| @@ -12,7 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CrossEntropy""" | |||
| import mindspore.nn as nn | |||
| from mindspore import Tensor | |||
| from mindspore.common import dtype as mstype | |||
| @@ -22,6 +22,7 @@ from mindspore.ops import operations as P | |||
| class CrossEntropy(_Loss): | |||
| """CrossEntropy""" | |||
| def __init__(self, smooth_factor=0., num_classes=1000): | |||
| super(CrossEntropy, self).__init__() | |||
| self.onehot = P.OneHot() | |||
| @@ -13,7 +13,7 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| from mindspore.ops.op_info_register import op_info_register | |||
| """batch_matmul_impl""" | |||
| @op_info_register("""{ | |||
| "op_name": "CusBatchMatMul", | |||
| @@ -71,4 +71,5 @@ from mindspore.ops.op_info_register import op_info_register | |||
| ] | |||
| }""") | |||
| def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=True, kernel_name="batchmatmul"): | |||
| """CusBatchMatMul""" | |||
| return | |||
| @@ -13,7 +13,7 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| from mindspore.ops.op_info_register import op_info_register | |||
| """CusCholeskyTrsm""" | |||
| @op_info_register("""{ | |||
| "op_name": "CusCholeskyTrsm", | |||
| @@ -59,4 +59,5 @@ from mindspore.ops.op_info_register import op_info_register | |||
| ] | |||
| }""") | |||
| def CusCholeskyTrsm(input_x, output, kernel_name): | |||
| """CusCholeskyTrsm""" | |||
| return | |||
| @@ -12,11 +12,10 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusBatchMatMul""" | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| from mindspore.ops.composite import multitype_ops as C | |||
| # y = x^2 | |||
| class CusBatchMatMul(PrimitiveWithInfer): | |||
| """CusMatMulCube definition""" | |||
| @@ -35,4 +34,5 @@ class CusBatchMatMul(PrimitiveWithInfer): | |||
| return data1_shape | |||
| def infer_dtype(self, data1_dtype, data2_dtype): | |||
| return data1_dtype | |||
| return data1_dtype | |||
| @@ -12,6 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusCholeskyTrsm""" | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| @@ -24,11 +25,14 @@ class CusCholeskyTrsm(PrimitiveWithInfer): | |||
| self.init_prim_io_names(inputs=['x1'], outputs=['y']) | |||
| def infer_shape(self, data1_shape): | |||
| m, n = data1_shape | |||
| ll = [] | |||
| m, _ = data1_shape | |||
| if m >= 128: | |||
| return [m // 128, 128, 128] | |||
| ll = [m // 128, 128, 128] | |||
| else: | |||
| return [1, 64, 64] | |||
| ll = [1, 64, 64] | |||
| return ll | |||
| def infer_dtype(self, data1_dtype): | |||
| return data1_dtype | |||
| return data1_dtype | |||
| @@ -12,6 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusFusedAbsMax1""" | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| from mindspore.ops.composite import multitype_ops as C | |||
| @@ -32,10 +33,13 @@ class CusFusedAbsMax1(PrimitiveWithInfer): | |||
| return bprop | |||
| def infer_shape(self, data1_shape): | |||
| ll = [] | |||
| if len(data1_shape) == 2: | |||
| return [1, ] | |||
| ll = [1,] | |||
| else: | |||
| return [32, 64] | |||
| ll = [32, 64] | |||
| return ll | |||
| def infer_dtype(self, data1_dtype): | |||
| return data1_dtype | |||
| @@ -12,7 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusImg2Col""" | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| from mindspore.ops.composite import multitype_ops as C | |||
| @@ -49,3 +49,4 @@ class CusImg2Col(PrimitiveWithInfer): | |||
| def infer_dtype(self, data1_dtype): | |||
| return data1_dtype | |||
| @@ -12,6 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusMatMulCube""" | |||
| import mindspore as ms | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| from mindspore.ops.composite import multitype_ops as C | |||
| @@ -37,11 +38,11 @@ class CusMatMulCube(PrimitiveWithInfer): | |||
| def infer_shape(self, data1_shape, data2_shape): | |||
| # shape = [1, data1_shape[1], data2_shape[2], 16, 16] | |||
| # return shape | |||
| if self.transpose_a == True: | |||
| if self.transpose_a: | |||
| k1, m = data1_shape | |||
| else: | |||
| m, k1 = data1_shape | |||
| if self.transpose_b == True: | |||
| if self.transpose_b: | |||
| n, k2 = data2_shape | |||
| else: | |||
| k2, n = data2_shape | |||
| @@ -51,3 +52,4 @@ class CusMatMulCube(PrimitiveWithInfer): | |||
| def infer_dtype(self, data1_dtype, data2_dtype): | |||
| return ms.common.dtype.tensor_type(getattr(ms, "float32")) | |||
| @@ -12,6 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusMatMulCubeDenseLeft""" | |||
| import mindspore as ms | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| from mindspore.ops.composite import multitype_ops as C | |||
| @@ -12,6 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusMatMulCubeFraczRightMul""" | |||
| import mindspore as ms | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| from mindspore.ops.composite import multitype_ops as C | |||
| @@ -12,6 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusMatrixCombine""" | |||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | |||
| from mindspore.ops.composite import multitype_ops as C | |||
| @@ -12,7 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusFusedAbsMax1""" | |||
| from mindspore.ops.op_info_register import op_info_register | |||
| @@ -65,4 +65,5 @@ from mindspore.ops.op_info_register import op_info_register | |||
| ] | |||
| }""") | |||
| def CusFusedAbsMax1(input_x, output, origin_shape=None, kernel_name="fused_abs_max1"): | |||
| """CusFusedAbsMax1""" | |||
| return | |||
| @@ -12,7 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusImg2ColNC1HWC0""" | |||
| from mindspore.ops.op_info_register import op_info_register | |||
| @@ -83,4 +83,5 @@ from mindspore.ops.op_info_register import op_info_register | |||
| ] | |||
| }""") | |||
| def CusImg2ColNC1HWC0(input_x, output, ksizes, strides, dilates, padding, kernel_name="img2col"): | |||
| """CusImg2ColNC1HWC0""" | |||
| return | |||
| @@ -2,11 +2,11 @@ | |||
| # -*- coding:utf-8 -*- | |||
| """ | |||
| copyright 2020 Huawei Technologies Co., Ltd | |||
| Licensed under the Apache License, Version 2.0 (the "License"); | |||
| you may not use this file except in compliance with the License. | |||
| You may obtain a copy of the License at | |||
| http://www.apache.org/licenses/LICENSE-2.0 | |||
| Unless required by applicable law or agreed to in writing, software | |||
| @@ -14,7 +14,7 @@ distributed under the License == distributed on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| matmul | |||
| """ | |||
| from __future__ import absolute_import | |||
| @@ -98,4 +98,5 @@ NoneType = type(None) | |||
| @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | |||
| def CusMatMulCubeDenseLeft(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, | |||
| kernel_name="matmulcube"): | |||
| """CusMatMulCubeDenseLeft""" | |||
| return | |||
| @@ -2,19 +2,19 @@ | |||
| # -*- coding:utf-8 -*- | |||
| """ | |||
| copyright 2020 Huawei Technologies Co., Ltd | |||
| Licensed under the Apache License, Version 2.0 (the "License"); | |||
| you may not use this file except in compliance with the License. | |||
| You may obtain a copy of the License at | |||
| http://www.apache.org/licenses/LICENSE-2.0 | |||
| Unless required by applicable law or agreed to in writing, software | |||
| distributed under the License == distributed on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| matmul | |||
| """ | |||
| from __future__ import absolute_import | |||
| @@ -99,4 +99,5 @@ NoneType = type(None) | |||
| @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | |||
| def CusMatMulCubeFraczLeftCast(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, | |||
| kernel_name="CusMatMulCubeFraczLeftCast"): | |||
| """CusMatMulCubeFraczLeftCast""" | |||
| return | |||
| @@ -2,19 +2,19 @@ | |||
| # -*- coding:utf-8 -*- | |||
| """ | |||
| copyright 2020 Huawei Technologies Co., Ltd | |||
| Licensed under the Apache License, Version 2.0 (the "License"); | |||
| you may not use this file except in compliance with the License. | |||
| You may obtain a copy of the License at | |||
| http://www.apache.org/licenses/LICENSE-2.0 | |||
| Unless required by applicable law or agreed to in writing, software | |||
| distributed under the License == distributed on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| matmul | |||
| """ | |||
| from __future__ import absolute_import | |||
| @@ -2,19 +2,19 @@ | |||
| # -*- coding:utf-8 -*- | |||
| """ | |||
| copyright 2020 Huawei Technologies Co., Ltd | |||
| Licensed under the Apache License, Version 2.0 (the "License"); | |||
| you may not use this file except in compliance with the License. | |||
| You may obtain a copy of the License at | |||
| http://www.apache.org/licenses/LICENSE-2.0 | |||
| Unless required by applicable law or agreed to in writing, software | |||
| distributed under the License == distributed on an "AS IS" BASIS, | |||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| See the License for the specific language governing permissions and | |||
| limitations under the License. | |||
| matmul | |||
| """ | |||
| from __future__ import absolute_import | |||
| @@ -110,4 +110,5 @@ NoneType = type(None) | |||
| # pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements | |||
| @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | |||
| def CusMatMulCube(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"): | |||
| """CusMatMulCube""" | |||
| return | |||
| @@ -12,7 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusMatrixCombine""" | |||
| from mindspore.ops.op_info_register import op_info_register | |||
| @@ -59,4 +59,5 @@ from mindspore.ops.op_info_register import op_info_register | |||
| ] | |||
| }""") | |||
| def CusMatrixCombine(input_x, output, kernel_name="matrix_combine"): | |||
| """CusMatrixCombine""" | |||
| return | |||
| @@ -12,7 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """CusTranspose02314""" | |||
| from mindspore.ops.op_info_register import op_info_register | |||
| @@ -59,4 +59,5 @@ from mindspore.ops.op_info_register import op_info_register | |||
| ] | |||
| }""") | |||
| def CusTranspose02314(input_x, output, kernel_name="transpose021354"): | |||
| """CusTranspose02314""" | |||
| return | |||
| @@ -19,12 +19,14 @@ import numpy as np | |||
| def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): | |||
| """linear_warmup_lr""" | |||
| lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) | |||
| lr = float(init_lr) + lr_inc * current_step | |||
| return lr | |||
| def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5): | |||
| """linear_warmup_lr""" | |||
| base_lr = lr | |||
| warmup_init_lr = 0 | |||
| total_steps = int(max_epoch * steps_per_epoch) | |||
| @@ -44,6 +46,7 @@ def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, et | |||
| def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5): | |||
| """warmup_cosine_annealing_lr""" | |||
| base_lr = lr | |||
| warmup_init_lr = 0 | |||
| total_steps = int(max_epoch * steps_per_epoch * 0.99) | |||
| @@ -25,17 +25,17 @@ from mindspore.train.parallel_utils import ParallelMode | |||
| class DatasetHelper: | |||
| """ | |||
| Help function to use the Minddata dataset. | |||
| According to different context, change the iter of dataset, to use the same for loop in different context. | |||
| Note: | |||
| The iter of DatasetHelper will give one epoch data. | |||
| Args: | |||
| dataset (DataSet): The dataset. | |||
| dataset_sink_mode (bool): If true use GetNext to fetch the data, or else feed the data from host. | |||
| Default: True. | |||
| Examples: | |||
| >>> dataset_helper = DatasetHelper(dataset) | |||
| >>> for inputs in dataset_helper: | |||
| @@ -12,6 +12,7 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """grad_reducer_thor""" | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.communication.management import GlobalComm, get_group_size | |||
| from mindspore.nn.cell import Cell | |||
| @@ -13,8 +13,6 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Model.""" | |||
| import mindspore.nn as nn | |||
| import numpy as np | |||
| from mindspore import context | |||
| from mindspore import log as logger | |||
| from mindspore._c_expression import init_exec_dataset | |||
| @@ -30,16 +28,17 @@ from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_ | |||
| from mindspore.train import amp | |||
| from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks | |||
| from mindspore.train.parallel_utils import ParallelMode | |||
| import mindspore.nn as nn | |||
| from second_order.dataset_helper import DatasetHelper | |||
| import numpy as np | |||
| def _convert_type(types): | |||
| """ | |||
| Convert from numpy type to tensor type. | |||
| Args: | |||
| types (list): Numpy type list of element in dataset. | |||
| Returns: | |||
| list, list of element in dataset. | |||
| """ | |||
| @@ -76,9 +75,9 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'): | |||
| class Model: | |||
| """ | |||
| High-Level API for Training or Testing. | |||
| `Model` groups layers into an object with training and inference features. | |||
| Args: | |||
| network (Cell): The training or testing network. | |||
| loss_fn (Cell): Objective function, if loss_fn is None, the | |||
| @@ -96,15 +95,15 @@ class Model: | |||
| metric. Default: None. | |||
| amp_level (str): Option for argument `level` in `mindspore.amp.build_train_network`, level for mixed | |||
| precision training. Supports [O0, O2]. Default: "O0". | |||
| - O0: Do not change. | |||
| - O2: Cast network to float16, keep batchnorm run in float32, using dynamic loss scale. | |||
| loss_scale_manager (Union[None, LossScaleManager]): If None, not scale the loss, or else | |||
| scale the loss by LossScaleManager. If it is set, overwrite the level setting. It's a eyword argument. | |||
| e.g. Use `loss_scale_manager=None` to set the value. | |||
| keep_batchnorm_fp32 (bool): Keep Batchnorm run in `float32`. If set, overwrite the level setting. Default: True. | |||
| Examples: | |||
| >>> class Net(nn.Cell): | |||
| >>> def __init__(self): | |||
| @@ -250,7 +249,7 @@ class Model: | |||
| def _train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True): | |||
| """ | |||
| Training. | |||
| Args: | |||
| epoch (int): Total number of iterations on the data. | |||
| train_dataset (Dataset): A training dataset iterator. If there is no | |||
| @@ -296,7 +295,7 @@ class Model: | |||
| def _train_dataset_sink_process(self, epoch, train_dataset, list_callback=None, cb_params=None): | |||
| """ | |||
| Training process. The data would be passed to network through dataset channel. | |||
| Args: | |||
| epoch (int): Total number of iterations on the data. | |||
| train_dataset (Dataset): A training dataset iterator. If there is no | |||
| @@ -366,7 +365,7 @@ class Model: | |||
| def _train_process(self, epoch, train_dataset, list_callback=None, cb_params=None): | |||
| """ | |||
| Training process. The data would be passed to network directly. | |||
| Args: | |||
| epoch (int): Total number of iterations on the data. | |||
| train_dataset (Dataset): A training dataset iterator. If there is no | |||
| @@ -426,9 +425,9 @@ class Model: | |||
| def train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True): | |||
| """ | |||
| Training API where the iteration is controlled by python front-end. | |||
| When setting pynative mode, the training process will be performed with dataset not sink. | |||
| Note: | |||
| CPU is not supported when dataset_sink_mode is true. | |||
| If dataset_sink_mode is True, epoch of training should be equal to the count of repeat | |||
| @@ -436,7 +435,7 @@ class Model: | |||
| is not the amount training requires. | |||
| If dataset_sink_mode is True, data will be sent to device. If device is Ascend, features | |||
| of data will be transferred one by one. The limitation of data transmission per time is 256M. | |||
| Args: | |||
| epoch (int): Total number of iterations on the data. | |||
| train_dataset (Dataset): A training dataset iterator. If there is no | |||
| @@ -448,8 +447,8 @@ class Model: | |||
| dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. | |||
| Configure pynative mode, the training process will be performed with | |||
| dataset not sink. | |||
| Examples: | |||
| >>> dataset = get_dataset() | |||
| >>> net = Net() | |||
| @@ -477,12 +476,12 @@ class Model: | |||
| def _eval_dataset_sink_process(self, valid_dataset, list_callback=None, cb_params=None): | |||
| """ | |||
| Evaluation. The data would be passed to network through dataset channel. | |||
| Args: | |||
| valid_dataset (Dataset): Dataset to evaluate the model. | |||
| list_callback (ListCallback): Executor of callback list. Default: None. | |||
| cb_params (_InternalCallbackParam): Callback parameters. Default: None. | |||
| Returns: | |||
| Dict, returns the loss value & metrics values for the model in test mode. | |||
| """ | |||
| @@ -526,7 +525,7 @@ class Model: | |||
| def _eval_process(self, valid_dataset, list_callback=None, cb_params=None): | |||
| """ | |||
| Evaluation. The data would be passed to network directly. | |||
| Args: | |||
| valid_dataset (Dataset): Dataset to evaluate the model. | |||
| list_callback (ListCallback): Executor of callback list. Default: None. | |||
| @@ -555,23 +554,23 @@ class Model: | |||
| def eval(self, valid_dataset, callbacks=None, dataset_sink_mode=True): | |||
| """ | |||
| Evaluation API where the iteration is controlled by python front-end. | |||
| Configure to pynative mode, the evaluation will be performed with dataset non-sink mode. | |||
| Note: | |||
| CPU is not supported when dataset_sink_mode is true. | |||
| If dataset_sink_mode is True, data will be sent to device. If device is Ascend, features | |||
| of data will be transferred one by one. The limitation of data transmission per time is 256M. | |||
| Args: | |||
| valid_dataset (Dataset): Dataset to evaluate the model. | |||
| callbacks (list): List of callback object. Callbacks which should be excuted | |||
| while training. Default: None. | |||
| dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. | |||
| Returns: | |||
| Dict, returns the loss value & metrics values for the model in test mode. | |||
| Examples: | |||
| >>> dataset = get_dataset() | |||
| >>> net = Net() | |||
| @@ -603,18 +602,18 @@ class Model: | |||
| def predict(self, *predict_data): | |||
| """ | |||
| Generates output predictions for the input samples. | |||
| Data could be single tensor, or list of tensor, tuple of tensor. | |||
| Note: | |||
| Batch data should be put together in one tensor. | |||
| Args: | |||
| predict_data (Tensor): Tensor of predict data. can be array, list or tuple. | |||
| Returns: | |||
| Tensor, array(s) of predictions. | |||
| Examples: | |||
| >>> input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) | |||
| >>> model = Model(Net()) | |||
| @@ -126,8 +126,8 @@ def _bn_last(channel): | |||
| def _fc(in_channel, out_channel, damping, loss_scale, frequency): | |||
| weight_shape = (out_channel, in_channel) | |||
| weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5)) | |||
| return Dense_Thor(in_channel, out_channel, has_bias=False, weight_init=weight, bias_init=0, | |||
| damping=damping, loss_scale=loss_scale, frequency=frequency) | |||
| return Dense_Thor(in_channel, out_channel, has_bias=False, weight_init=weight, | |||
| bias_init=0, damping=damping, loss_scale=loss_scale, frequency=frequency) | |||
| class ResidualBlock(nn.Cell): | |||
| @@ -14,8 +14,6 @@ | |||
| # ============================================================================ | |||
| """momentum""" | |||
| import mindspore.common.dtype as mstype | |||
| from cus_ops.cus_matmul_cube_dense_right import CusMatMulCubeDenseRight | |||
| from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter | |||
| from mindspore.common.parameter import ParameterTuple | |||
| @@ -24,6 +22,8 @@ from mindspore.nn.optim.optimizer import Optimizer | |||
| from mindspore.ops import functional as F, composite as C, operations as P | |||
| from mindspore.parallel._utils import _get_device_num, _get_mirror_mean | |||
| from cus_ops.cus_matmul_cube_dense_right import CusMatMulCubeDenseRight | |||
| from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast | |||
| from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft | |||
| from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul | |||
| from model.grad_reducer_thor import DistributedGradReducerThor | |||
| @@ -52,6 +52,7 @@ def _tensor_apply_decay(weight_decay, if_apply, weight, gradient): | |||
| class THOR(Optimizer): | |||
| """THOR""" | |||
| def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, | |||
| loss_scale=1.0, | |||
| decay_filter=lambda x: x.name not in []): | |||
| @@ -12,10 +12,9 @@ | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """thor_layer""" | |||
| import mindspore as ms | |||
| import mindspore.common.dtype as mstype | |||
| import numpy as np | |||
| from mindspore._checkparam import check_bool, twice, check_int_positive | |||
| from mindspore._extends import cell_attr_register | |||
| from mindspore.common.initializer import initializer | |||
| @@ -33,6 +32,7 @@ from cus_ops.cus_matmul_cube import CusMatMulCube | |||
| from cus_ops.cus_matrix_combine import CusMatrixCombine | |||
| from cus_ops.cus_transpose02314 import CusTranspose02314 | |||
| import numpy as np | |||
| C0 = 16 | |||
| @@ -91,8 +91,7 @@ class _Conv(Cell): | |||
| 'attr \'group\' of \'Conv2D\' Op.') | |||
| self.weight = Parameter(initializer( | |||
| weight_init, [out_channels, in_channels // group, *kernel_size]), | |||
| name='weight') | |||
| weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') | |||
| if check_bool(has_bias): | |||
| self.bias = Parameter(_initializer( | |||
| @@ -107,6 +106,7 @@ class _Conv(Cell): | |||
| class Conv2d_Thor(_Conv): | |||
| """Conv2d_Thor""" | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| @@ -180,7 +180,7 @@ class Conv2d_Thor(_Conv): | |||
| self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) | |||
| self.fake_G = Tensor( | |||
| np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)) | |||
| self.fake_G_inv_max = Tensor(np.zeros([1, ]).astype(np.float32)) | |||
| self.fake_G_inv_max = Tensor(np.zeros([1,]).astype(np.float32)) | |||
| self.shape = P.Shape() | |||
| self.reshape = P.Reshape() | |||
| @@ -196,8 +196,8 @@ class Conv2d_Thor(_Conv): | |||
| self.channels_slice_flag = True | |||
| self.padA_flag = False | |||
| if ( | |||
| self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim and self.matrix_A_dim > self.diag_block_dim: | |||
| if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim | |||
| and self.matrix_A_dim > self.diag_block_dim: | |||
| self.padA_flag = True | |||
| pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim | |||
| self.padA = P.Pad(((0, pad_dim), (0, pad_dim))) | |||
| @@ -228,6 +228,7 @@ class Conv2d_Thor(_Conv): | |||
| self.getG = P.InsertGradientOf(self.save_gradient) | |||
| def save_gradient(self, dout): | |||
| """save_gradient""" | |||
| out = dout | |||
| dout = self.mul(dout, self.loss_scale) | |||
| dout = self.mul(dout, 32.0) | |||
| @@ -252,7 +253,6 @@ class Conv2d_Thor(_Conv): | |||
| matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv_max) | |||
| self.G_inv_max = matrix_G_inv_max | |||
| matrix_G_inv = self.matrix_combine(matrix_G_inv) | |||
| matrix_G_inv_shape = self.shape(matrix_G_inv) | |||
| matrix_G_inv = self.reshape(matrix_G_inv, self.matrix_G_device_temp_shape) | |||
| matrix_G_inv = self.transpose(matrix_G_inv, (2, 0, 1, 3)) | |||
| matrix_G = self.cast(matrix_G_inv, mstype.float16) | |||
| @@ -287,7 +287,6 @@ class Conv2d_Thor(_Conv): | |||
| self.A_inv_max = matrix_A_inv_max | |||
| matrix_A_inv = self.matrix_combine(matrix_A_inv) | |||
| matrix_A_inv = self.cast(matrix_A_inv, mstype.float16) | |||
| in_channels = self.in_channels | |||
| if self.padA_flag: | |||
| matrix_A_inv = self.slice(matrix_A_inv, (0, 0), (self.matrix_A_dim, self.matrix_A_dim)) | |||
| @@ -307,22 +306,23 @@ class Conv2d_Thor(_Conv): | |||
| return out | |||
| def extra_repr(self): | |||
| """extra_repr""" | |||
| s = 'input_channels={}, output_channels={}, kernel_size={},' \ | |||
| 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ | |||
| 'group={}, data_format={}, has_bias={},' \ | |||
| 'weight_init={}, bias_init={}'.format( | |||
| self.in_channels, | |||
| self.out_channels, | |||
| self.kernel_size, | |||
| self.stride, | |||
| self.pad_mode, | |||
| self.padding, | |||
| self.dilation, | |||
| self.group, | |||
| self.data_format, | |||
| self.has_bias, | |||
| self.weight, | |||
| self.bias) | |||
| self.in_channels, | |||
| self.out_channels, | |||
| self.kernel_size, | |||
| self.stride, | |||
| self.pad_mode, | |||
| self.padding, | |||
| self.dilation, | |||
| self.group, | |||
| self.data_format, | |||
| self.has_bias, | |||
| self.weight, | |||
| self.bias) | |||
| if self.has_bias: | |||
| s += ', bias={}'.format(self.bias) | |||
| @@ -330,6 +330,7 @@ class Conv2d_Thor(_Conv): | |||
| class Dense_Thor(Cell): | |||
| """Dense_Thor""" | |||
| @cell_attr_register(attrs=['has_bias', 'activation']) | |||
| def __init__(self, | |||
| in_channels, | |||
| @@ -405,6 +406,7 @@ class Dense_Thor(Cell): | |||
| self.getG = P.InsertGradientOf(self.save_gradient) | |||
| def save_gradient(self, dout): | |||
| """save_gradient""" | |||
| out = dout | |||
| dout = self.mul(dout, self.loss_scale) | |||
| dout = self.mul(dout, 32.0) | |||
| @@ -435,6 +437,7 @@ class Dense_Thor(Cell): | |||
| return out | |||
| def construct(self, x): | |||
| """construct""" | |||
| if self.thor: | |||
| inputs = self.cube_matmul(x, x) | |||
| normalizer = 32 | |||
| @@ -472,6 +475,7 @@ class Dense_Thor(Cell): | |||
| return output | |||
| def extend_repr(self): | |||
| """extend_repr""" | |||
| str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \ | |||
| .format(self.in_channels, self.out_channels, self.weight, self.has_bias) | |||
| if self.has_bias: | |||
| @@ -18,7 +18,6 @@ import os | |||
| import random | |||
| import mindspore.dataset.engine as de | |||
| import numpy as np | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| from mindspore.communication.management import init | |||
| @@ -30,6 +29,7 @@ from second_order.model_second_order import Model | |||
| from second_order.resnet import resnet50 | |||
| from second_order.thor import THOR | |||
| import numpy as np | |||
| from config_imagenet import config | |||
| from crossentropy import CrossEntropy | |||
| from dataset_imagenet import create_dataset | |||
| @@ -56,13 +56,14 @@ context.set_context(enable_mem_reuse=True) | |||
| def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epoch): | |||
| """get_second_order_lr""" | |||
| lr_each_step = [] | |||
| total_steps = steps_per_epoch * total_epochs | |||
| for i in range(total_steps): | |||
| epoch = (i + 1) / steps_per_epoch | |||
| base = (1.0 - float(epoch) / total_epochs) ** decay | |||
| lr = lr_init * base | |||
| lr_each_step.append(lr) | |||
| lr_local = lr_init * base | |||
| lr_each_step.append(lr_local) | |||
| current_step = global_step | |||
| lr_each_step = np.array(lr_each_step).astype(np.float32) | |||
| print("learning_rate_is=====", lr_each_step) | |||
| @@ -71,12 +72,13 @@ def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epo | |||
| def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs, steps_per_epoch): | |||
| """get_second_order_damping""" | |||
| damping_each_step = [] | |||
| total_steps = steps_per_epoch * total_epochs | |||
| for step in range(total_steps): | |||
| epoch = (step + 1) / steps_per_epoch | |||
| damping = damping_init * (decay_rate ** (epoch / 10)) | |||
| damping_each_step.append(damping) | |||
| damping_here = damping_init * (decay_rate ** (epoch / 10)) | |||
| damping_each_step.append(damping_here) | |||
| current_step = global_step | |||
| damping_each_step = np.array(damping_each_step).astype(np.float32) | |||