Browse Source

add tbe some ops about optimizer for vm

tags/v0.5.0-beta
liuxiao 5 years ago
parent
commit
cda547e92f
9 changed files with 612 additions and 0 deletions
  1. +4
    -0
      mindspore/ccsrc/kernel/tbe/tbe_adapter.cc
  2. +4
    -0
      mindspore/ops/_op_impl/tbe/__init__.py
  3. +68
    -0
      mindspore/ops/_op_impl/tbe/apply_ada_max.py
  4. +66
    -0
      mindspore/ops/_op_impl/tbe/apply_adadelta.py
  5. +55
    -0
      mindspore/ops/_op_impl/tbe/apply_adagrad.py
  6. +56
    -0
      mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py
  7. +5
    -0
      mindspore/ops/operations/__init__.py
  8. +277
    -0
      mindspore/ops/operations/nn_ops.py
  9. +77
    -0
      tests/ut/python/ops/test_ops.py

+ 4
- 0
mindspore/ccsrc/kernel/tbe/tbe_adapter.cc View File

@@ -69,6 +69,10 @@ static std::map<string, string> tbe_func_adapter_map = {
{"strided_slice", "strided_slice_d"}, {"strided_slice", "strided_slice_d"},
{"strided_slice_grad", "strided_slice_grad_d"}, {"strided_slice_grad", "strided_slice_grad_d"},
{"sparse_apply_ftrl", "sparse_apply_ftrl_d"}, {"sparse_apply_ftrl", "sparse_apply_ftrl_d"},
{"apply_ada_max", "apply_ada_max_d"},
{"apply_adadelta", "apply_adadelta_d"},
{"apply_adagrad", "apply_adagrad_d"},
{"apply_adagrad_v2", "apply_adagradv2_d"},
{"transpose", "transpose_d"}, {"transpose", "transpose_d"},
{"fill", "fill_d"}, {"fill", "fill_d"},
{"unsorted_segment_sum", "unsorted_segment_sum_d"}, {"unsorted_segment_sum", "unsorted_segment_sum_d"},


+ 4
- 0
mindspore/ops/_op_impl/tbe/__init__.py View File

@@ -27,6 +27,10 @@ from .add_n import _add_n_tbe
from .apply_ftrl import _apply_ftrl_tbe from .apply_ftrl import _apply_ftrl_tbe
from .apply_momentum import _apply_momentum_tbe from .apply_momentum import _apply_momentum_tbe
from .apply_adam import _apply_adam_tbe from .apply_adam import _apply_adam_tbe
from .apply_ada_max import _apply_ada_max_tbe
from .apply_adadelta import _apply_adadelta_tbe
from .apply_adagrad import _apply_adagrad_tbe
from .apply_adagrad_v2 import _apply_adagrad_v2_tbe
from .adam_apply_one import _adam_apply_one_tbe from .adam_apply_one import _adam_apply_one_tbe
from .assign import _assign_tbe from .assign import _assign_tbe
from .assign_add import _assign_add_tbe from .assign_add import _assign_add_tbe


+ 68
- 0
mindspore/ops/_op_impl/tbe/apply_ada_max.py View File

@@ -0,0 +1,68 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""ApplyAdaMaxD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

apply_ada_max_d_op_info = TBERegOp("ApplyAdaMax") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_ada_max_d.so") \
.compute_cost(10) \
.kernel_name("apply_ada_max_d") \
.partial_flag(True) \
.input(0, "var", False, "required", "all") \
.input(1, "m", False, "required", "all") \
.input(2, "v", False, "required", "all") \
.input(3, "beta1_power", False, "required", "all") \
.input(4, "lr", False, "required", "all") \
.input(5, "beta1", False, "required", "all") \
.input(6, "beta2", False, "required", "all") \
.input(7, "epsilon", False, "required", "all") \
.input(8, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "m", False, "required", "all") \
.output(2, "v", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default) \
.get_op_info()


@op_info_register(apply_ada_max_d_op_info)
def _apply_ada_max_tbe():
"""ApplyAdaMaxD TBE register"""
return

+ 66
- 0
mindspore/ops/_op_impl/tbe/apply_adadelta.py View File

@@ -0,0 +1,66 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""ApplyAdadeltaD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

apply_adadelta_d_op_info = TBERegOp("ApplyAdadelta") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adadelta_d.so") \
.compute_cost(10) \
.kernel_name("apply_adadelta_d") \
.partial_flag(True) \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "accum_update", False, "required", "all") \
.input(3, "lr", False, "required", "all") \
.input(4, "rho", False, "required", "all") \
.input(5, "epsilon", False, "required", "all") \
.input(6, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.output(2, "accum_update", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_5HD, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_FracZ, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_5HD, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_FracZ, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()


@op_info_register(apply_adadelta_d_op_info)
def _apply_adadelta_tbe():
"""ApplyAdadeltaD TBE register"""
return

+ 55
- 0
mindspore/ops/_op_impl/tbe/apply_adagrad.py View File

@@ -0,0 +1,55 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""ApplyAdagradD op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

apply_adagrad_d_op_info = TBERegOp("ApplyAdagrad") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adagrad_d.so") \
.compute_cost(10) \
.kernel_name("apply_adagrad_d") \
.partial_flag(True) \
.attr("update_slots", "optional", "bool", "true,false", "false") \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "lr", False, "required", "all") \
.input(3, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()


@op_info_register(apply_adagrad_d_op_info)
def _apply_adagrad_tbe():
"""ApplyAdagradD TBE register"""
return

+ 56
- 0
mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py View File

@@ -0,0 +1,56 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""ApplyAdagradV2D op"""
from mindspore.ops.op_info_register import op_info_register, TBERegOp, DataType

apply_adagrad_v2_d_op_info = TBERegOp("ApplyAdagradV2") \
.fusion_type("OPAQUE") \
.async_flag(False) \
.binfile_name("apply_adagradv2_d.so") \
.compute_cost(10) \
.kernel_name("apply_adagradv2_d") \
.partial_flag(True) \
.attr("epsilon", "required", "float", "all") \
.attr("update_slots", "optional", "bool", "true,false", "false") \
.input(0, "var", False, "required", "all") \
.input(1, "accum", False, "required", "all") \
.input(2, "lr", False, "required", "all") \
.input(3, "grad", False, "required", "all") \
.output(0, "var", False, "required", "all") \
.output(1, "accum", False, "required", "all") \
.dtype_format(DataType.F16_5HD, DataType.F16_5HD, DataType.F16_Default, DataType.F16_5HD,
DataType.F16_5HD, DataType.F16_5HD) \
.dtype_format(DataType.F16_FracZ, DataType.F16_FracZ, DataType.F16_Default, DataType.F16_FracZ,
DataType.F16_FracZ, DataType.F16_FracZ) \
.dtype_format(DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0, DataType.F16_Default, DataType.F16_C1HWNCoC0,
DataType.F16_C1HWNCoC0, DataType.F16_C1HWNCoC0) \
.dtype_format(DataType.F16_Default, DataType.F16_Default, DataType.F16_Default, DataType.F16_Default,
DataType.F16_Default, DataType.F16_Default) \
.dtype_format(DataType.F32_5HD, DataType.F32_5HD, DataType.F32_Default, DataType.F32_5HD,
DataType.F32_5HD, DataType.F32_5HD) \
.dtype_format(DataType.F32_FracZ, DataType.F32_FracZ, DataType.F32_Default, DataType.F32_FracZ,
DataType.F32_FracZ, DataType.F32_FracZ) \
.dtype_format(DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0, DataType.F32_Default, DataType.F32_C1HWNCoC0,
DataType.F32_C1HWNCoC0, DataType.F32_C1HWNCoC0) \
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default,
DataType.F32_Default, DataType.F32_Default) \
.get_op_info()


@op_info_register(apply_adagrad_v2_d_op_info)
def _apply_adagrad_v2_tbe():
"""ApplyAdagradV2D TBE register"""
return

+ 5
- 0
mindspore/ops/operations/__init__.py View File

@@ -72,6 +72,7 @@ from .nn_ops import (LSTM, SGD, Adam, ApplyMomentum, BatchNorm,
SparseSoftmaxCrossEntropyWithLogits, Tanh, SparseSoftmaxCrossEntropyWithLogits, Tanh,
TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl, TopK, BinaryCrossEntropy, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
ApplyProximalAdagrad, SparseApplyProximalAdagrad, ApplyProximalAdagrad, SparseApplyProximalAdagrad,
ApplyAdaMax, ApplyAdadelta, ApplyAdagrad, ApplyAdagradV2,
ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell) ApplyRMSProp, ApplyCenteredRMSProp, BasicLSTMCell)
from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode, from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
CheckValid, MakeRefKey, Partial, Depend, CheckBprop, ConfusionMatrix) CheckValid, MakeRefKey, Partial, Depend, CheckBprop, ConfusionMatrix)
@@ -278,6 +279,10 @@ __all__ = [
"SparseApplyFtrl", "SparseApplyFtrl",
"ApplyProximalAdagrad", "ApplyProximalAdagrad",
"SparseApplyProximalAdagrad", "SparseApplyProximalAdagrad",
"ApplyAdaMax",
"ApplyAdadelta",
"ApplyAdagrad",
"ApplyAdagradV2",
"BatchToSpace", "BatchToSpace",
"Atan2", "Atan2",
"ApplyRMSProp", "ApplyRMSProp",


+ 277
- 0
mindspore/ops/operations/nn_ops.py View File

@@ -2769,6 +2769,283 @@ class BinaryCrossEntropy(PrimitiveWithInfer):
return x_type return x_type




class ApplyAdaMax(PrimitiveWithInfer):
r"""
Update relevant entries according to the adamax scheme.

The updating formulas are as follows,

.. math::
\begin{array}{ll} \\
m_{t} = \beta_1 * m_{t-1} + (1 - \beta_1) * g \\
v_{t} = \max(\beta_2 * v{t-1}, \left| g \right|) \\
var = var - \frac{l}{1 - \beta_1^t} * \frac{m_{t}}{v_{t} + \epsilon}
\end{array}

:math:`t` represents updating step while, :math:`m` represents the 1st moment vector, :math:`m_{t-1}`
is the last momentent of :math:`m_{t}`, :math:`v` represents the 2nd moment vector, :math:`v_{t-1}`
is the last momentent of :math:`v_{t}`, :math:`l` represents scaling factor `lr`,
:math:`g` represents `grad`, :math:`\beta_1, \beta_2` represent `beta1` and `beta2`,
:math:`beta_1^t` represent `beta1_power`, :math:`var` represents Variable to be updated,
:math:`\epsilon` represents `epsilon`.

Inputs:
- **var** (Parameter) - Variable to be updated.
- **m** (Parameter) - The 1st moment vector in the updating formula. Has the same shape and type as `var`.
- **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients,
has the same shape and type as `var`.
- **beta1_power** (float) - :math:`beta_1^t` in the updating formula.
- **lr** (float) - Learning rate, :math:`l` in the updating formula. Has the same type as `var`.
- **beta1** (float) - The exponential decay rate for the 1st moment estimates.
- **beta2** (float) - The exponential decay rate for the 2nd moment estimates.
- **epsilon** (float) - A small value added for numerical stability.
- **grad** (Tensor) - A tensor for gradient. Has the same shape and type as `var`.

Outputs:
Tuple of 3 Tensor, the updated parameters.

- **var** (Tensor) - The same shape and data type as `var`.
- **m** (Tensor) - The same shape and data type as `m`.
- **v** (Tensor) - The same shape and data type as `v`.

Examples:
>>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
>>> m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
>>> v = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="v")
>>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
>>> beta1_power = 0.9
>>> lr = 0.001
>>> beta1 = 0.9
>>> beta2 = 0.99
>>> epsilon = 1e-10
>>> apply_ada_max = P.ApplyAdaMax()
>>> output = apply_ada_max(var, m, v, beta1_power, lr, beta1, beta2, epsilon, grad)
"""

__mindspore_signature__ = (
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('m', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('v', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('beta1_power', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE,
sig_dtype.T),
('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('beta1', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('beta2', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('epsilon', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T)
)

@prim_attr_register
def __init__(self):
"""init ApplyAdaMax"""

def infer_shape(self, var_shape, m_shape, v_shape, beta1_power_shape, lr_shape,
beta1_shape, beta2_shape, epsilon_shape, grad_shape):
validator.check("var_shape", var_shape, "m_shape", m_shape, Rel.EQ, self.name)
validator.check("var_shape", var_shape, "v_shape", v_shape, Rel.EQ, self.name)
validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name)
return var_shape, m_shape, v_shape

def infer_dtype(self, var_dtype, m_dtype, v_dtype, beta1_power_dtype, lr_dtype,
beta1_dtype, beta2_dtype, epsilon_dtype, grad_dtype):
args = {"var": var_dtype, "m": m_dtype, "v": v_dtype, "grad": grad_dtype}
validator.check_tensor_type_same(args, mstype.number_type, self.name)

scalar_args = {"beta1_power": beta1_power_dtype, 'lr': lr_dtype, "beta1": beta1_dtype,
"beta2": beta2_dtype, "epsilon": epsilon_dtype}
validator.check_scalar_or_tensor_type_same(scalar_args, [mstype.float16, mstype.float32], self.name, True)
return var_dtype, m_dtype, v_dtype


class ApplyAdadelta(PrimitiveWithInfer):
r"""
Update relevant entries according to the adadelta scheme.

.. math::
accum = \rho * accum + (1 - \rho) * grad^2
.. math::
update = \sqrt{accum_update + \esilon} * \rsqrt{accum + \epsilon} * grad
.. math::
accum_update = \rho * accum_update + (1 - \rho) * update^2
.. math::
var -= lr * update

Inputs:
- **var** (Parameter) - Weights to be updated.
- **accum** (Parameter) - Accum to be updated, has the same shape and type as `var`.
- **accum_update** (Parameter) - Accum_update to be updated, has the same shape and type as `var`.
- **lr** (float) - Learning rate, has the same type as `var`.
- **rho** (float) - Decay rate.
- **epsilon** (float) - A small value added for numerical stability.
- **grad** (Tensor) - Gradients, has the same shape and type as `var`.

Outputs:
Tuple of 3 Tensor, the updated parameters.

- **var** (Tensor) - The same shape and data type as `var`.
- **accum** (Tensor) - The same shape and data type as `accum`.
- **accum_update** (Tensor) - The same shape and data type as `accum_update`.

Examples:
>>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
>>> accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
>>> accum_update = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum_update")
>>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
>>> lr = 0.001
>>> rho = 0.0
>>> epsilon = 1e-6
>>> apply_adadelta = P.ApplyAdadelta()
>>> output = apply_adadelta(var, accum, accum_update, lr, rho, epsilon, grad)
"""

__mindspore_signature__ = (
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('accum_update', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE,
sig_dtype.T),
('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('rho', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('epsilon', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T)
)

@prim_attr_register
def __init__(self):
"""init ApplyAdadelta"""

def infer_shape(self, var_shape, accum_shape, accum_update_shape, lr_shape, rho_shape,
epsilon_shape, grad_shape):
validator.check("var_shape", var_shape, "accum_shape", accum_shape, Rel.EQ, self.name)
validator.check("var_shape", var_shape, "accum_update_shape", accum_update_shape, Rel.EQ, self.name)
validator.check("var_shape", var_shape, "grad_shape", grad_shape, Rel.EQ, self.name)
return var_shape, accum_shape, accum_update_shape

def infer_dtype(self, var_dtype, accum_dtype, accum_update_dtype, lr_dtype, rho_shape,
epsilon_dtype, grad_dtype):
args = {"var": var_dtype, "accum": accum_dtype, "accum_update": accum_update_dtype, "grad": grad_dtype}
validator.check_tensor_type_same(args, mstype.number_type, self.name)

scalar_args = {"lr": lr_dtype, "rho": rho_shape, "epsilon": epsilon_dtype}
validator.check_scalar_or_tensor_type_same(scalar_args, [mstype.float16, mstype.float32], self.name, True)
return var_dtype, accum_dtype, accum_update_dtype


class ApplyAdagrad(PrimitiveWithInfer):
r"""
Update relevant entries according to the adagrad scheme.

.. math::
accum += grad * grad
.. math::
var -= lr * grad * \frac{1}{\sqrt{accum}}

Args:
update_slots (bool): If `True`, `accum` will be updated. Default: True.

Inputs:
- **var** (Parameter) - Variable to be updated.
- **accum** (Parameter) - Accum to be updated. The shape and dtype should be the same as `var`.
- **lr** (float): The learning rate value, has the same type as `var`.
- **grad** (Tensor) - A tensor for gradient. The shape and dtype should be the same as `var`.

Outputs:
Tuple of 2 Tensor, the updated parameters.

- **var** (Tensor) - The same shape and data type as `var`.
- **accum** (Tensor) - The same shape and data type as `accum`.

Examples:
>>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
>>> accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
>>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
>>> lr = 0.01
>>> apply_adagrad = P.ApplyAdagrad()
>>> output = apply_adagrad(var, accum, lr, grad)
"""

__mindspore_signature__ = (
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T)
)

@prim_attr_register
def __init__(self, update_slots=True):
validator.check_value_type("update_slots", update_slots, [bool], self.name)

def infer_shape(self, var_shape, accum_shape, lr_shape, grad_shape):
validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
validator.check('var shape', var_shape, 'grad shape', grad_shape, Rel.EQ, self.name)
return var_shape, accum_shape

def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, grad_dtype):
args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
validator.check_tensor_type_same(args, mstype.number_type, self.name)
valid_types = [mstype.float16, mstype.float32]
validator.check_scalar_or_tensor_type_same({'lr': lr_dtype}, valid_types, self.name)
return var_dtype, accum_dtype


class ApplyAdagradV2(PrimitiveWithInfer):
r"""
Update relevant entries according to the adagradv2 scheme.

.. math::
accum += grad * grad
.. math::
var -= lr * grad * \frac{1}{\sqrt{accum} + \epsilon}

Args:
epsilon (float): A small value added for numerical stability.
update_slots (bool): If `True`, `accum` will be updated. Default: True.

Inputs:
- **var** (Parameter) - Variable to be updated.
- **accum** (Parameter) - Accum to be updated. The shape and dtype should be the same as `var`.
- **lr** (float): The learning rate value, has the same type as `var`.
- **grad** (Tensor) - A tensor for gradient. The shape and dtype should be the same as `var`.

Outputs:
Tuple of 2 Tensor, the updated parameters.

- **var** (Tensor) - The same shape and data type as `var`.
- **accum** (Tensor) - The same shape and data type as `m`.

Examples:
>>> var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
>>> accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
>>> grad = Tensor(np.random.rand(3, 3).astype(np.float32))
>>> lr = 0.01
>>> apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6)
>>> output = apply_adagrad_v2(var, accum, lr, grad)
"""

__mindspore_signature__ = (
('var', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('accum', sig_rw.RW_WRITE, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('lr', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T),
('grad', sig_rw.RW_READ, sig_kind.KIND_POSITIONAL_KEYWORD, sig_kind.KIND_EMPTY_DEFAULT_VALUE, sig_dtype.T)
)

@prim_attr_register
def __init__(self, epsilon, update_slots=True):
validator.check_value_type("epsilon", epsilon, [float], self.name)
validator.check_value_type("update_slots", update_slots, [bool], self.name)

def infer_shape(self, var_shape, accum_shape, lr_shape, grad_shape):
validator.check('var shape', var_shape, 'accum shape', accum_shape, Rel.EQ, self.name)
validator.check('var shape', var_shape, 'grad shape', grad_shape, Rel.EQ, self.name)
return var_shape, accum_shape

def infer_dtype(self, var_dtype, accum_dtype, lr_dtype, grad_dtype):
args = {'var': var_dtype, 'accum': accum_dtype, 'grad': grad_dtype}
validator.check_tensor_type_same(args, mstype.number_type, self.name)
valid_types = [mstype.float16, mstype.float32]
validator.check_scalar_or_tensor_type_same({'lr': lr_dtype}, valid_types, self.name)
return var_dtype, accum_dtype


class SparseApplyAdagrad(PrimitiveWithInfer): class SparseApplyAdagrad(PrimitiveWithInfer):
r""" r"""
Update relevant entries according to the adagrad scheme. Update relevant entries according to the adagrad scheme.


+ 77
- 0
tests/ut/python/ops/test_ops.py View File

@@ -270,6 +270,67 @@ class ApplyProximalAdagradNet(nn.Cell):
return out return out




class ApplyAdaMaxNet(nn.Cell):
def __init__(self):
super(ApplyAdaMaxNet, self).__init__()
self.apply_ada_max = P.ApplyAdaMax()
self.beta1_power = 0.9
self.lr = 0.001
self.beta1 = 0.9
self.beta2 = 0.99
self.epsilon = 1e-10
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.m = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="m")
self.v = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="v")

def construct(self, grad):
out = self.apply_ada_max(self.var, self.m, self.v, self.beta1_power, self.lr,
self.beta1, self.beta2, self.epsilon, grad)
return out


class ApplyAdadeltaNet(nn.Cell):
def __init__(self):
super(ApplyAdadeltaNet, self).__init__()
self.apply_adadelta = P.ApplyAdadelta()
self.lr = 0.001
self.rho = 0.0
self.epsilon = 1e-6
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")
self.accum_update = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum_update")

def construct(self, grad):
out = self.apply_adadelta(self.var, self.accum, self.accum_update, self.lr, self.rho, self.epsilon, grad)
return out


class ApplyAdagradNet(nn.Cell):
def __init__(self):
super(ApplyAdagradNet, self).__init__()
self.apply_adagrad = P.ApplyAdagrad()
self.lr = 0.001
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")

def construct(self, grad):
out = self.apply_adagrad(self.var, self.accum, self.lr, grad)
return out


class ApplyAdagradV2Net(nn.Cell):
def __init__(self):
super(ApplyAdagradV2Net, self).__init__()
self.apply_adagrad_v2 = P.ApplyAdagradV2(epsilon=1e-6)
self.lr = 0.001
self.var = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="var")
self.accum = Parameter(Tensor(np.random.rand(3, 3).astype(np.float32)), name="accum")

def construct(self, grad):
out = self.apply_adagrad_v2(self.var, self.accum, self.lr, grad)
return out


class ApplyRMSNet(nn.Cell): class ApplyRMSNet(nn.Cell):
def __init__(self): def __init__(self):
super(ApplyRMSNet, self).__init__() super(ApplyRMSNet, self).__init__()
@@ -1072,6 +1133,22 @@ test_case_nn_ops = [
'block': SparseApplyProximalAdagradNet(), 'block': SparseApplyProximalAdagradNet(),
'desc_inputs': [[3, 3], Tensor(np.ones((3,), np.int32))], 'desc_inputs': [[3, 3], Tensor(np.ones((3,), np.int32))],
'skip': ['backward']}), 'skip': ['backward']}),
('ApplyAdaMax', {
'block': ApplyAdaMaxNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdadelta', {
'block': ApplyAdadeltaNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdagrad', {
'block': ApplyAdagradNet(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('ApplyAdagradV2', {
'block': ApplyAdagradV2Net(),
'desc_inputs': [[3, 3]],
'skip': ['backward']}),
('Flatten_1', { ('Flatten_1', {
'block': NetForFlatten(), 'block': NetForFlatten(),
'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))], 'desc_inputs': [Tensor(np.ones([2, 3, 4]).astype(np.int32)), Tensor(np.ones([2, 12]).astype(np.int32))],


Loading…
Cancel
Save