| @@ -14,7 +14,6 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| """THOR""" | """THOR""" | ||||
| from mindspore.ops import functional as F, composite as C, operations as P | from mindspore.ops import functional as F, composite as C, operations as P | ||||
| from mindspore.ops import _selected_ops | |||||
| from mindspore.common.initializer import initializer | from mindspore.common.initializer import initializer | ||||
| from mindspore.common.parameter import Parameter, ParameterTuple | from mindspore.common.parameter import Parameter, ParameterTuple | ||||
| from mindspore.common.tensor import Tensor | from mindspore.common.tensor import Tensor | ||||
| @@ -62,7 +61,7 @@ class THOR_GPU(Optimizer): | |||||
| self.use_nesterov = check_bool(use_nesterov) | self.use_nesterov = check_bool(use_nesterov) | ||||
| self.moments = self.params.clone(prefix="moments", init='zeros') | self.moments = self.params.clone(prefix="moments", init='zeros') | ||||
| self.hyper_map = C.HyperMap() | self.hyper_map = C.HyperMap() | ||||
| self.opt = _selected_ops.ApplyMomentum(use_nesterov=self.use_nesterov) | |||||
| self.opt = P.ApplyMomentum(use_nesterov=self.use_nesterov) | |||||
| self.feature_map = [1.0 / 12544, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, | self.feature_map = [1.0 / 12544, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, | ||||
| 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, | 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, 1.0 / 3136, | ||||
| @@ -27,7 +27,6 @@ from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | |||||
| from mindspore.context import ParallelMode | from mindspore.context import ParallelMode | ||||
| from mindspore.communication.management import get_group_size | from mindspore.communication.management import get_group_size | ||||
| from mindspore import context | from mindspore import context | ||||
| from mindspore.ops import _selected_ops | |||||
| from .bert_model import BertModel | from .bert_model import BertModel | ||||
| from .utils import ClipByGlobalNorm | from .utils import ClipByGlobalNorm | ||||
| @@ -132,7 +131,7 @@ class GetNextSentenceOutput(nn.Cell): | |||||
| """ | """ | ||||
| def __init__(self, config): | def __init__(self, config): | ||||
| super(GetNextSentenceOutput, self).__init__() | super(GetNextSentenceOutput, self).__init__() | ||||
| self.log_softmax = _selected_ops.LogSoftmax() | |||||
| self.log_softmax = P.LogSoftmax() | |||||
| weight_init = TruncatedNormal(config.initializer_range) | weight_init = TruncatedNormal(config.initializer_range) | ||||
| self.dense = nn.Dense(config.hidden_size, 2, | self.dense = nn.Dense(config.hidden_size, 2, | ||||
| weight_init=weight_init, has_bias=True).to_float(config.compute_type) | weight_init=weight_init, has_bias=True).to_float(config.compute_type) | ||||
| @@ -23,7 +23,6 @@ from mindspore.common.parameter import Parameter | |||||
| from mindspore.common.tensor import Tensor | from mindspore.common.tensor import Tensor | ||||
| from mindspore.communication.management import get_group_size | from mindspore.communication.management import get_group_size | ||||
| from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | ||||
| from mindspore.ops import _selected_ops | |||||
| from mindspore.ops import composite as C | from mindspore.ops import composite as C | ||||
| from mindspore.ops import functional as F | from mindspore.ops import functional as F | ||||
| from mindspore.ops import operations as P | from mindspore.ops import operations as P | ||||
| @@ -147,7 +146,7 @@ class GetNextSentenceOutput(nn.Cell): | |||||
| def __init__(self, config): | def __init__(self, config): | ||||
| super(GetNextSentenceOutput, self).__init__() | super(GetNextSentenceOutput, self).__init__() | ||||
| self.log_softmax = _selected_ops.LogSoftmax() | |||||
| self.log_softmax = P.LogSoftmax() | |||||
| weight_init = TruncatedNormal(config.initializer_range) | weight_init = TruncatedNormal(config.initializer_range) | ||||
| self.dense = nn.Dense(config.hidden_size, 2, | self.dense = nn.Dense(config.hidden_size, 2, | ||||
| weight_init=weight_init, has_bias=True).to_float(config.compute_type) | weight_init=weight_init, has_bias=True).to_float(config.compute_type) | ||||