Merge pull request !4227 from panbingao/pylintfixtags/v0.7.0-beta
| @@ -13,10 +13,10 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """Dataset module.""" | """Dataset module.""" | ||||
| import numpy as np | |||||
| from PIL import Image | from PIL import Image | ||||
| import mindspore.dataset as de | import mindspore.dataset as de | ||||
| import mindspore.dataset.transforms.vision.c_transforms as C | import mindspore.dataset.transforms.vision.c_transforms as C | ||||
| import numpy as np | |||||
| from .ei_dataset import HwVocRawDataset | from .ei_dataset import HwVocRawDataset | ||||
| from .utils import custom_transforms as tr | from .utils import custom_transforms as tr | ||||
| @@ -110,8 +110,6 @@ class LossCallBack(Callback): | |||||
| class LossNet(nn.Cell): | class LossNet(nn.Cell): | ||||
| """FasterRcnn loss method""" | """FasterRcnn loss method""" | ||||
| def __init__(self): | |||||
| super(LossNet, self).__init__() | |||||
| def construct(self, x1, x2, x3, x4, x5, x6): | def construct(self, x1, x2, x3, x4, x5, x6): | ||||
| return x1 + x2 | return x1 + x2 | ||||
| @@ -117,8 +117,6 @@ class LossCallBack(Callback): | |||||
| class LossNet(nn.Cell): | class LossNet(nn.Cell): | ||||
| """MaskRcnn loss method""" | """MaskRcnn loss method""" | ||||
| def __init__(self): | |||||
| super(LossNet, self).__init__() | |||||
| def construct(self, x1, x2, x3, x4, x5, x6, x7): | def construct(self, x1, x2, x3, x4, x5, x6, x7): | ||||
| return x1 + x2 | return x1 + x2 | ||||
| @@ -20,8 +20,8 @@ from __future__ import division | |||||
| import os | import os | ||||
| import json | import json | ||||
| import xml.etree.ElementTree as et | import xml.etree.ElementTree as et | ||||
| import cv2 | |||||
| import numpy as np | import numpy as np | ||||
| import cv2 | |||||
| import mindspore.dataset as de | import mindspore.dataset as de | ||||
| import mindspore.dataset.transforms.vision.c_transforms as C | import mindspore.dataset.transforms.vision.c_transforms as C | ||||
| @@ -14,8 +14,8 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| """Parameters utils""" | """Parameters utils""" | ||||
| from mindspore.common.initializer import initializer, TruncatedNormal | |||||
| import numpy as np | import numpy as np | ||||
| from mindspore.common.initializer import initializer, TruncatedNormal | |||||
| def init_net_param(network, initialize_mode='TruncatedNormal'): | def init_net_param(network, initialize_mode='TruncatedNormal'): | ||||
| """Init the parameters in net.""" | """Init the parameters in net.""" | ||||
| @@ -13,6 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """Automatic differentiation with grad clip.""" | """Automatic differentiation with grad clip.""" | ||||
| import numpy as np | |||||
| from mindspore.parallel._utils import (_get_device_num, _get_mirror_mean, | from mindspore.parallel._utils import (_get_device_num, _get_mirror_mean, | ||||
| _get_parallel_mode) | _get_parallel_mode) | ||||
| from mindspore.train.parallel_utils import ParallelMode | from mindspore.train.parallel_utils import ParallelMode | ||||
| @@ -24,7 +25,6 @@ from mindspore.nn.cell import Cell | |||||
| from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | ||||
| import mindspore.nn as nn | import mindspore.nn as nn | ||||
| from mindspore.common.tensor import Tensor | from mindspore.common.tensor import Tensor | ||||
| import numpy as np | |||||
| compute_norm = C.MultitypeFuncGraph("compute_norm") | compute_norm = C.MultitypeFuncGraph("compute_norm") | ||||
| @@ -297,6 +297,9 @@ class AttentionHead(nn.Cell): | |||||
| self.activation = activation | self.activation = activation | ||||
| def construct(self, input_feature, bias_mat, training=True): | def construct(self, input_feature, bias_mat, training=True): | ||||
| """ | |||||
| Attention Head for Graph Attention Networks. | |||||
| """ | |||||
| if training is True: | if training is True: | ||||
| input_feature = self.in_drop(input_feature) | input_feature = self.in_drop(input_feature) | ||||
| @@ -38,7 +38,7 @@ class MaskedSoftMaxLoss(nn.Cell): | |||||
| self.num_params = len(self.params) | self.num_params = len(self.params) | ||||
| def construct(self, logits): | def construct(self, logits): | ||||
| # calc l2 loss | |||||
| """calc l2 loss""" | |||||
| l2_loss = 0 | l2_loss = 0 | ||||
| for i in range(self.num_params): | for i in range(self.num_params): | ||||
| l2_loss = l2_loss + self.l2_coeff * P.L2Loss()(self.params[i]) | l2_loss = l2_loss + self.l2_coeff * P.L2Loss()(self.params[i]) | ||||
| @@ -69,6 +69,7 @@ class MaskedAccuracy(nn.Cell): | |||||
| self.mask = Tensor(mask, dtype=mstype.float32) | self.mask = Tensor(mask, dtype=mstype.float32) | ||||
| def construct(self, logits): | def construct(self, logits): | ||||
| """Calculate accuracy""" | |||||
| logits = P.Reshape()(logits, (-1, self.num_class)) | logits = P.Reshape()(logits, (-1, self.num_class)) | ||||
| labels = P.Reshape()(self.label, (-1, self.num_class)) | labels = P.Reshape()(self.label, (-1, self.num_class)) | ||||
| mask = P.Reshape()(self.mask, (-1,)) | mask = P.Reshape()(self.mask, (-1,)) | ||||
| @@ -66,6 +66,9 @@ class GraphConvolution(nn.Cell): | |||||
| self.matmul = P.MatMul() | self.matmul = P.MatMul() | ||||
| def construct(self, adj, input_feature): | def construct(self, adj, input_feature): | ||||
| """ | |||||
| GCN graph convolution layer. | |||||
| """ | |||||
| dropout = input_feature | dropout = input_feature | ||||
| if self.dropout_flag: | if self.dropout_flag: | ||||
| dropout = self.dropout(dropout) | dropout = self.dropout(dropout) | ||||
| @@ -39,6 +39,7 @@ class Loss(nn.Cell): | |||||
| self.param = param | self.param = param | ||||
| def construct(self, preds): | def construct(self, preds): | ||||
| """Calculate loss""" | |||||
| param = self.l2_loss(self.param) | param = self.l2_loss(self.param) | ||||
| loss = self.weight_decay * param | loss = self.weight_decay * param | ||||
| preds = self.cast(preds, mstype.float32) | preds = self.cast(preds, mstype.float32) | ||||
| @@ -88,6 +88,7 @@ class BertPretrainEva(nn.Cell): | |||||
| def construct(self, input_ids, input_mask, token_type_id, masked_pos, masked_ids, masked_weights, nsp_label): | def construct(self, input_ids, input_mask, token_type_id, masked_pos, masked_ids, masked_weights, nsp_label): | ||||
| """Calculate prediction scores""" | |||||
| bs, _ = self.shape(input_ids) | bs, _ = self.shape(input_ids) | ||||
| probs = self.bert(input_ids, input_mask, token_type_id, masked_pos) | probs = self.bert(input_ids, input_mask, token_type_id, masked_pos) | ||||
| index = self.argmax(probs) | index = self.argmax(probs) | ||||
| @@ -99,7 +99,7 @@ class BertFinetuneCell(nn.Cell): | |||||
| token_type_id, | token_type_id, | ||||
| label_ids, | label_ids, | ||||
| sens=None): | sens=None): | ||||
| """Bert Finetune""" | |||||
| weights = self.weights | weights = self.weights | ||||
| init = False | init = False | ||||
| @@ -195,6 +195,7 @@ class BertSquadCell(nn.Cell): | |||||
| unique_id, | unique_id, | ||||
| is_impossible, | is_impossible, | ||||
| sens=None): | sens=None): | ||||
| """BertSquad""" | |||||
| weights = self.weights | weights = self.weights | ||||
| init = self.alloc_status() | init = self.alloc_status() | ||||
| loss = self.network(input_ids, | loss = self.network(input_ids, | ||||
| @@ -313,6 +314,7 @@ class BertSquad(nn.Cell): | |||||
| self.squeeze = P.Squeeze(axis=-1) | self.squeeze = P.Squeeze(axis=-1) | ||||
| def construct(self, input_ids, input_mask, token_type_id, start_position, end_position, unique_id, is_impossible): | def construct(self, input_ids, input_mask, token_type_id, start_position, end_position, unique_id, is_impossible): | ||||
| """interface for SQuAD finetuning task""" | |||||
| logits = self.bert(input_ids, input_mask, token_type_id) | logits = self.bert(input_ids, input_mask, token_type_id) | ||||
| if self.is_training: | if self.is_training: | ||||
| unstacked_logits_0 = self.squeeze(logits[:, :, 0:1]) | unstacked_logits_0 = self.squeeze(logits[:, :, 0:1]) | ||||
| @@ -103,6 +103,7 @@ class GetMaskedLMOutput(nn.Cell): | |||||
| input_tensor, | input_tensor, | ||||
| output_weights, | output_weights, | ||||
| positions): | positions): | ||||
| """Get output log_probs""" | |||||
| flat_offsets = self.reshape( | flat_offsets = self.reshape( | ||||
| self.rng * self.seq_length_tensor, self.shape_flat_offsets) | self.rng * self.seq_length_tensor, self.shape_flat_offsets) | ||||
| flat_position = self.reshape(positions + flat_offsets, self.last_idx) | flat_position = self.reshape(positions + flat_offsets, self.last_idx) | ||||
| @@ -248,6 +249,7 @@ class BertNetworkWithLoss(nn.Cell): | |||||
| masked_lm_positions, | masked_lm_positions, | ||||
| masked_lm_ids, | masked_lm_ids, | ||||
| masked_lm_weights): | masked_lm_weights): | ||||
| """Get pre-training loss""" | |||||
| prediction_scores, seq_relationship_score = \ | prediction_scores, seq_relationship_score = \ | ||||
| self.bert(input_ids, input_mask, token_type_id, masked_lm_positions) | self.bert(input_ids, input_mask, token_type_id, masked_lm_positions) | ||||
| total_loss = self.loss(prediction_scores, seq_relationship_score, | total_loss = self.loss(prediction_scores, seq_relationship_score, | ||||
| @@ -137,6 +137,7 @@ class EmbeddingLookup(nn.Cell): | |||||
| self.shape = tuple(embedding_shape) | self.shape = tuple(embedding_shape) | ||||
| def construct(self, input_ids): | def construct(self, input_ids): | ||||
| """Get output and embeddings lookup table""" | |||||
| extended_ids = self.expand(input_ids, -1) | extended_ids = self.expand(input_ids, -1) | ||||
| flat_ids = self.reshape(extended_ids, self.shape_flat) | flat_ids = self.reshape(extended_ids, self.shape_flat) | ||||
| if self.use_one_hot_embeddings: | if self.use_one_hot_embeddings: | ||||
| @@ -205,6 +206,7 @@ class EmbeddingPostprocessor(nn.Cell): | |||||
| name='full_position_embeddings') | name='full_position_embeddings') | ||||
| def construct(self, token_type_ids, word_embeddings): | def construct(self, token_type_ids, word_embeddings): | ||||
| """Postprocessors apply positional and token type embeddings to word embeddings.""" | |||||
| output = word_embeddings | output = word_embeddings | ||||
| if self.use_token_type: | if self.use_token_type: | ||||
| flat_ids = self.reshape(token_type_ids, self.shape_flat) | flat_ids = self.reshape(token_type_ids, self.shape_flat) | ||||
| @@ -288,6 +290,7 @@ class RelaPosMatrixGenerator(nn.Cell): | |||||
| self.cast = P.Cast() | self.cast = P.Cast() | ||||
| def construct(self): | def construct(self): | ||||
| """Generates matrix of relative positions between inputs.""" | |||||
| range_vec_row_out = self.cast(F.tuple_to_array(F.make_range(self._length)), mstype.int32) | range_vec_row_out = self.cast(F.tuple_to_array(F.make_range(self._length)), mstype.int32) | ||||
| range_vec_col_out = self.range_mat(range_vec_row_out, (self._length, -1)) | range_vec_col_out = self.range_mat(range_vec_row_out, (self._length, -1)) | ||||
| tile_row_out = self.tile(range_vec_row_out, (self._length,)) | tile_row_out = self.tile(range_vec_row_out, (self._length,)) | ||||
| @@ -342,9 +345,9 @@ class RelaPosEmbeddingsGenerator(nn.Cell): | |||||
| self.matmul = P.BatchMatMul() | self.matmul = P.BatchMatMul() | ||||
| def construct(self): | def construct(self): | ||||
| """Generate embedding for each relative position of dimension depth.""" | |||||
| relative_positions_matrix_out = self.relative_positions_matrix() | relative_positions_matrix_out = self.relative_positions_matrix() | ||||
| # Generate embedding for each relative position of dimension depth. | |||||
| if self.use_one_hot_embeddings: | if self.use_one_hot_embeddings: | ||||
| flat_relative_positions_matrix = self.reshape(relative_positions_matrix_out, (-1,)) | flat_relative_positions_matrix = self.reshape(relative_positions_matrix_out, (-1,)) | ||||
| one_hot_relative_positions_matrix = self.one_hot( | one_hot_relative_positions_matrix = self.one_hot( | ||||
| @@ -495,7 +498,7 @@ class BertAttention(nn.Cell): | |||||
| use_one_hot_embeddings=use_one_hot_embeddings) | use_one_hot_embeddings=use_one_hot_embeddings) | ||||
| def construct(self, from_tensor, to_tensor, attention_mask): | def construct(self, from_tensor, to_tensor, attention_mask): | ||||
| # reshape 2d/3d input tensors to 2d | |||||
| """reshape 2d/3d input tensors to 2d""" | |||||
| from_tensor_2d = self.reshape(from_tensor, self.shape_from_2d) | from_tensor_2d = self.reshape(from_tensor, self.shape_from_2d) | ||||
| to_tensor_2d = self.reshape(to_tensor, self.shape_to_2d) | to_tensor_2d = self.reshape(to_tensor, self.shape_to_2d) | ||||
| query_out = self.query_layer(from_tensor_2d) | query_out = self.query_layer(from_tensor_2d) | ||||
| @@ -784,6 +787,7 @@ class BertTransformer(nn.Cell): | |||||
| self.out_shape = (batch_size, seq_length, hidden_size) | self.out_shape = (batch_size, seq_length, hidden_size) | ||||
| def construct(self, input_tensor, attention_mask): | def construct(self, input_tensor, attention_mask): | ||||
| """Multi-layer bert transformer.""" | |||||
| prev_output = self.reshape(input_tensor, self.shape) | prev_output = self.reshape(input_tensor, self.shape) | ||||
| all_encoder_layers = () | all_encoder_layers = () | ||||
| @@ -915,7 +919,7 @@ class BertModel(nn.Cell): | |||||
| self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config) | self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config) | ||||
| def construct(self, input_ids, token_type_ids, input_mask): | def construct(self, input_ids, token_type_ids, input_mask): | ||||
| """Bidirectional Encoder Representations from Transformers.""" | |||||
| # embedding | # embedding | ||||
| if not self.token_type_ids_from_dataset: | if not self.token_type_ids_from_dataset: | ||||
| token_type_ids = self.token_type_ids | token_type_ids = self.token_type_ids | ||||
| @@ -110,6 +110,7 @@ class BertNERModel(nn.Cell): | |||||
| self.origin_shape = (config.batch_size, config.seq_length, self.num_labels) | self.origin_shape = (config.batch_size, config.seq_length, self.num_labels) | ||||
| def construct(self, input_ids, input_mask, token_type_id): | def construct(self, input_ids, input_mask, token_type_id): | ||||
| """Return the final logits as the results of log_softmax.""" | |||||
| sequence_output, _, _ = \ | sequence_output, _, _ = \ | ||||
| self.bert(input_ids, token_type_id, input_mask) | self.bert(input_ids, token_type_id, input_mask) | ||||
| seq = self.dropout(sequence_output) | seq = self.dropout(sequence_output) | ||||
| @@ -13,6 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """fused layernorm""" | """fused layernorm""" | ||||
| import numpy as np | |||||
| from mindspore.ops import operations as P | from mindspore.ops import operations as P | ||||
| from mindspore.ops import functional as F | from mindspore.ops import functional as F | ||||
| from mindspore.common.parameter import Parameter | from mindspore.common.parameter import Parameter | ||||
| @@ -21,7 +22,6 @@ from mindspore.ops.primitive import constexpr | |||||
| import mindspore.common.dtype as mstype | import mindspore.common.dtype as mstype | ||||
| from mindspore.nn.cell import Cell | from mindspore.nn.cell import Cell | ||||
| import numpy as np | |||||
| __all__ = ['FusedLayerNorm'] | __all__ = ['FusedLayerNorm'] | ||||
| @@ -101,6 +101,7 @@ class FusedLayerNorm(Cell): | |||||
| self.use_batch_norm = use_batch_norm | self.use_batch_norm = use_batch_norm | ||||
| def construct(self, input_x): | def construct(self, input_x): | ||||
| """Applies Layer Normalization over a mini-batch of inputs""" | |||||
| if self.use_batch_norm and self.training: | if self.use_batch_norm and self.training: | ||||
| ones = P.Fill()(mstype.float32, F.shape(input_x)[:self.begin_norm_axis], 1.0) | ones = P.Fill()(mstype.float32, F.shape(input_x)[:self.begin_norm_axis], 1.0) | ||||
| zeros = P.Fill()(mstype.float32, F.shape(input_x)[:self.begin_norm_axis], 0.0) | zeros = P.Fill()(mstype.float32, F.shape(input_x)[:self.begin_norm_axis], 0.0) | ||||
| @@ -52,6 +52,7 @@ class LayerNorm(nn.Cell): | |||||
| self.get_shape = P.Shape() | self.get_shape = P.Shape() | ||||
| def construct(self, input_tensor): | def construct(self, input_tensor): | ||||
| """layer norm""" | |||||
| shape = self.get_shape(input_tensor) | shape = self.get_shape(input_tensor) | ||||
| batch_size = shape[0] | batch_size = shape[0] | ||||
| max_len = shape[1] | max_len = shape[1] | ||||
| @@ -13,6 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """fused layernorm""" | """fused layernorm""" | ||||
| import numpy as np | |||||
| from mindspore.ops import operations as P | from mindspore.ops import operations as P | ||||
| from mindspore.ops import functional as F | from mindspore.ops import functional as F | ||||
| from mindspore.common.parameter import Parameter | from mindspore.common.parameter import Parameter | ||||
| @@ -21,7 +22,6 @@ from mindspore.ops.primitive import constexpr | |||||
| import mindspore.common.dtype as mstype | import mindspore.common.dtype as mstype | ||||
| from mindspore.nn.cell import Cell | from mindspore.nn.cell import Cell | ||||
| import numpy as np | |||||
| __all__ = ['FusedLayerNorm'] | __all__ = ['FusedLayerNorm'] | ||||
| @@ -241,6 +241,7 @@ class BeamSearchDecoder(nn.Cell): | |||||
| return cur_input_ids, state_log_probs, state_seq, state_finished, state_length | return cur_input_ids, state_log_probs, state_seq, state_finished, state_length | ||||
| def construct(self, enc_states, enc_attention_mask): | def construct(self, enc_states, enc_attention_mask): | ||||
| """Get beam search result.""" | |||||
| cur_input_ids = self.start_ids | cur_input_ids = self.start_ids | ||||
| # beam search states | # beam search states | ||||
| state_log_probs = self.init_scores | state_log_probs = self.init_scores | ||||
| @@ -55,7 +55,7 @@ class ClipGradients(nn.Cell): | |||||
| grads, | grads, | ||||
| clip_type, | clip_type, | ||||
| clip_value): | clip_value): | ||||
| # return grads | |||||
| """return grads""" | |||||
| if clip_type != 0 and clip_type != 1: | if clip_type != 0 and clip_type != 1: | ||||
| return grads | return grads | ||||
| @@ -131,6 +131,7 @@ class EmbeddingLookup(nn.Cell): | |||||
| self.shape = P.Shape() | self.shape = P.Shape() | ||||
| def construct(self, input_ids): | def construct(self, input_ids): | ||||
| """Get a embeddings lookup table with a fixed dictionary and size.""" | |||||
| input_shape = self.shape(input_ids) | input_shape = self.shape(input_ids) | ||||
| flat_ids = self.reshape(input_ids, self.shape_flat) | flat_ids = self.reshape(input_ids, self.shape_flat) | ||||
| @@ -200,6 +201,7 @@ class EmbeddingPostprocessor(nn.Cell): | |||||
| self.shape = P.Shape() | self.shape = P.Shape() | ||||
| def construct(self, word_embeddings): | def construct(self, word_embeddings): | ||||
| """Postprocessors apply positional embeddings to word embeddings.""" | |||||
| input_shape = self.shape(word_embeddings) | input_shape = self.shape(word_embeddings) | ||||
| input_len = input_shape[1] | input_len = input_shape[1] | ||||
| @@ -377,7 +379,7 @@ class MultiheadAttention(nn.Cell): | |||||
| self.softmax_cast = P.Cast() | self.softmax_cast = P.Cast() | ||||
| def construct(self, from_tensor, to_tensor, attention_mask=None): | def construct(self, from_tensor, to_tensor, attention_mask=None): | ||||
| # reshape 2d/3d input tensors to 2d | |||||
| """reshape 2d/3d input tensors to 2d""" | |||||
| from_tensor_2d = self.reshape(from_tensor, self.shape_from_2d) | from_tensor_2d = self.reshape(from_tensor, self.shape_from_2d) | ||||
| to_tensor_2d = self.reshape(to_tensor, self.shape_to_2d) | to_tensor_2d = self.reshape(to_tensor, self.shape_to_2d) | ||||
| query_out = self.query_layer(from_tensor_2d) | query_out = self.query_layer(from_tensor_2d) | ||||
| @@ -476,6 +478,7 @@ class SelfAttention(nn.Cell): | |||||
| self.reshape = P.Reshape() | self.reshape = P.Reshape() | ||||
| self.shape = (-1, hidden_size) | self.shape = (-1, hidden_size) | ||||
| def construct(self, input_tensor, memory_tensor, attention_mask): | def construct(self, input_tensor, memory_tensor, attention_mask): | ||||
| """Apply self-attention.""" | |||||
| input_tensor = self.reshape(input_tensor, self.shape) | input_tensor = self.reshape(input_tensor, self.shape) | ||||
| memory_tensor = self.reshape(memory_tensor, self.shape) | memory_tensor = self.reshape(memory_tensor, self.shape) | ||||
| @@ -831,6 +834,7 @@ class CreateAttentionMaskFromInputMask(nn.Cell): | |||||
| self.batch_matmul = P.BatchMatMul() | self.batch_matmul = P.BatchMatMul() | ||||
| def construct(self, input_mask): | def construct(self, input_mask): | ||||
| """Create attention mask according to input mask.""" | |||||
| input_shape = self.shape(input_mask) | input_shape = self.shape(input_mask) | ||||
| shape_right = (input_shape[0], 1, input_shape[1]) | shape_right = (input_shape[0], 1, input_shape[1]) | ||||
| shape_left = input_shape + (1,) | shape_left = input_shape + (1,) | ||||
| @@ -876,6 +880,7 @@ class PredLogProbs(nn.Cell): | |||||
| def construct(self, | def construct(self, | ||||
| input_tensor, | input_tensor, | ||||
| output_weights): | output_weights): | ||||
| """Get log probs.""" | |||||
| input_tensor = self.reshape(input_tensor, self.shape_flat_sequence_tensor) | input_tensor = self.reshape(input_tensor, self.shape_flat_sequence_tensor) | ||||
| input_tensor = self.cast(input_tensor, self.compute_type) | input_tensor = self.cast(input_tensor, self.compute_type) | ||||
| output_weights = self.cast(output_weights, self.compute_type) | output_weights = self.cast(output_weights, self.compute_type) | ||||
| @@ -962,7 +967,10 @@ class TransformerDecoderStep(nn.Cell): | |||||
| self.cast_compute_type = CastWrapper(dst_type=compute_type) | self.cast_compute_type = CastWrapper(dst_type=compute_type) | ||||
| def construct(self, input_ids, enc_states, enc_attention_mask): | def construct(self, input_ids, enc_states, enc_attention_mask): | ||||
| # input_ids: [batch_size * beam_width] | |||||
| """ | |||||
| Multi-layer transformer decoder step. | |||||
| input_ids: [batch_size * beam_width] | |||||
| """ | |||||
| # process embedding | # process embedding | ||||
| input_embedding, embedding_tables = self.tfm_embedding_lookup(input_ids) | input_embedding, embedding_tables = self.tfm_embedding_lookup(input_ids) | ||||
| input_embedding = self.tfm_embedding_processor(input_embedding) | input_embedding = self.tfm_embedding_processor(input_embedding) | ||||
| @@ -1122,6 +1130,7 @@ class TransformerModel(nn.Cell): | |||||
| self.encdec_mask = Tensor(ones, dtype=mstype.float32) | self.encdec_mask = Tensor(ones, dtype=mstype.float32) | ||||
| def construct(self, source_ids, source_mask, target_ids=None, target_mask=None): | def construct(self, source_ids, source_mask, target_ids=None, target_mask=None): | ||||
| """Transformer with encoder and decoder.""" | |||||
| # process source sentence | # process source sentence | ||||
| src_word_embeddings, embedding_tables = self.tfm_embedding_lookup(source_ids) | src_word_embeddings, embedding_tables = self.tfm_embedding_lookup(source_ids) | ||||
| src_embedding_output = self.tfm_embedding_postprocessor_for_encoder(src_word_embeddings) | src_embedding_output = self.tfm_embedding_postprocessor_for_encoder(src_word_embeddings) | ||||
| @@ -69,6 +69,7 @@ class LossCallBack(Callback): | |||||
| time_stamp_init = True | time_stamp_init = True | ||||
| def step_end(self, run_context): | def step_end(self, run_context): | ||||
| """Monitor the loss in training.""" | |||||
| global time_stamp_first | global time_stamp_first | ||||
| time_stamp_current = get_ms_timestamp() | time_stamp_current = get_ms_timestamp() | ||||
| cb_params = run_context.original_args() | cb_params = run_context.original_args() | ||||
| @@ -68,6 +68,7 @@ class LossCallBack(Callback): | |||||
| self._per_print_times = per_print_times | self._per_print_times = per_print_times | ||||
| def step_end(self, run_context): | def step_end(self, run_context): | ||||
| """Monitor the loss in training.""" | |||||
| cb_params = run_context.original_args() | cb_params = run_context.original_args() | ||||
| loss = cb_params.net_outputs.asnumpy() | loss = cb_params.net_outputs.asnumpy() | ||||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 | cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 | ||||
| @@ -19,8 +19,8 @@ import os | |||||
| import math | import math | ||||
| from enum import Enum | from enum import Enum | ||||
| import pandas as pd | |||||
| import numpy as np | import numpy as np | ||||
| import pandas as pd | |||||
| import mindspore.dataset.engine as de | import mindspore.dataset.engine as de | ||||
| import mindspore.common.dtype as mstype | import mindspore.common.dtype as mstype | ||||
| @@ -147,6 +147,7 @@ class DenseLayer(nn.Cell): | |||||
| return act_func | return act_func | ||||
| def construct(self, x): | def construct(self, x): | ||||
| """Dense Layer for Deep Layer of DeepFM Model.""" | |||||
| x = self.act_func(x) | x = self.act_func(x) | ||||
| if self.training: | if self.training: | ||||
| x = self.dropout(x) | x = self.dropout(x) | ||||
| @@ -47,6 +47,7 @@ class LossCallBack(Callback): | |||||
| self.config = config | self.config = config | ||||
| def step_end(self, run_context): | def step_end(self, run_context): | ||||
| """Monitor the loss in training.""" | |||||
| cb_params = run_context.original_args() | cb_params = run_context.original_args() | ||||
| wide_loss, deep_loss = cb_params.net_outputs[0].asnumpy(), cb_params.net_outputs[1].asnumpy() | wide_loss, deep_loss = cb_params.net_outputs[0].asnumpy(), cb_params.net_outputs[1].asnumpy() | ||||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 | cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 | ||||
| @@ -13,6 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """wide and deep model""" | """wide and deep model""" | ||||
| import numpy as np | |||||
| from mindspore import nn | from mindspore import nn | ||||
| from mindspore import Parameter, ParameterTuple | from mindspore import Parameter, ParameterTuple | ||||
| import mindspore.common.dtype as mstype | import mindspore.common.dtype as mstype | ||||
| @@ -28,7 +29,6 @@ from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_ | |||||
| from mindspore.train.parallel_utils import ParallelMode | from mindspore.train.parallel_utils import ParallelMode | ||||
| from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | ||||
| from mindspore.communication.management import get_group_size | from mindspore.communication.management import get_group_size | ||||
| import numpy as np | |||||
| np_type = np.float32 | np_type = np.float32 | ||||
| ms_type = mstype.float32 | ms_type = mstype.float32 | ||||