| @@ -12,7 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CrossEntropy""" | |||||
| import mindspore.nn as nn | import mindspore.nn as nn | ||||
| from mindspore import Tensor | from mindspore import Tensor | ||||
| from mindspore.common import dtype as mstype | from mindspore.common import dtype as mstype | ||||
| @@ -22,6 +22,7 @@ from mindspore.ops import operations as P | |||||
| class CrossEntropy(_Loss): | class CrossEntropy(_Loss): | ||||
| """CrossEntropy""" | |||||
| def __init__(self, smooth_factor=0., num_classes=1000): | def __init__(self, smooth_factor=0., num_classes=1000): | ||||
| super(CrossEntropy, self).__init__() | super(CrossEntropy, self).__init__() | ||||
| self.onehot = P.OneHot() | self.onehot = P.OneHot() | ||||
| @@ -13,7 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| from mindspore.ops.op_info_register import op_info_register | from mindspore.ops.op_info_register import op_info_register | ||||
| """batch_matmul_impl""" | |||||
| @op_info_register("""{ | @op_info_register("""{ | ||||
| "op_name": "CusBatchMatMul", | "op_name": "CusBatchMatMul", | ||||
| @@ -71,4 +71,5 @@ from mindspore.ops.op_info_register import op_info_register | |||||
| ] | ] | ||||
| }""") | }""") | ||||
| def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=True, kernel_name="batchmatmul"): | def CusBatchMatMul(input_x1, input_x2, output, transpose_a=False, transpose_b=True, kernel_name="batchmatmul"): | ||||
| """CusBatchMatMul""" | |||||
| return | return | ||||
| @@ -13,7 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| from mindspore.ops.op_info_register import op_info_register | from mindspore.ops.op_info_register import op_info_register | ||||
| """CusCholeskyTrsm""" | |||||
| @op_info_register("""{ | @op_info_register("""{ | ||||
| "op_name": "CusCholeskyTrsm", | "op_name": "CusCholeskyTrsm", | ||||
| @@ -59,4 +59,5 @@ from mindspore.ops.op_info_register import op_info_register | |||||
| ] | ] | ||||
| }""") | }""") | ||||
| def CusCholeskyTrsm(input_x, output, kernel_name): | def CusCholeskyTrsm(input_x, output, kernel_name): | ||||
| """CusCholeskyTrsm""" | |||||
| return | return | ||||
| @@ -12,11 +12,10 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusBatchMatMul""" | |||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| from mindspore.ops.composite import multitype_ops as C | from mindspore.ops.composite import multitype_ops as C | ||||
| # y = x^2 | |||||
| class CusBatchMatMul(PrimitiveWithInfer): | class CusBatchMatMul(PrimitiveWithInfer): | ||||
| """CusMatMulCube definition""" | """CusMatMulCube definition""" | ||||
| @@ -35,4 +34,5 @@ class CusBatchMatMul(PrimitiveWithInfer): | |||||
| return data1_shape | return data1_shape | ||||
| def infer_dtype(self, data1_dtype, data2_dtype): | def infer_dtype(self, data1_dtype, data2_dtype): | ||||
| return data1_dtype | |||||
| return data1_dtype | |||||
| @@ -12,6 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusCholeskyTrsm""" | |||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| @@ -24,11 +25,14 @@ class CusCholeskyTrsm(PrimitiveWithInfer): | |||||
| self.init_prim_io_names(inputs=['x1'], outputs=['y']) | self.init_prim_io_names(inputs=['x1'], outputs=['y']) | ||||
| def infer_shape(self, data1_shape): | def infer_shape(self, data1_shape): | ||||
| m, n = data1_shape | |||||
| ll = [] | |||||
| m, _ = data1_shape | |||||
| if m >= 128: | if m >= 128: | ||||
| return [m // 128, 128, 128] | |||||
| ll = [m // 128, 128, 128] | |||||
| else: | else: | ||||
| return [1, 64, 64] | |||||
| ll = [1, 64, 64] | |||||
| return ll | |||||
| def infer_dtype(self, data1_dtype): | def infer_dtype(self, data1_dtype): | ||||
| return data1_dtype | |||||
| return data1_dtype | |||||
| @@ -12,6 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusFusedAbsMax1""" | |||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| from mindspore.ops.composite import multitype_ops as C | from mindspore.ops.composite import multitype_ops as C | ||||
| @@ -32,10 +33,13 @@ class CusFusedAbsMax1(PrimitiveWithInfer): | |||||
| return bprop | return bprop | ||||
| def infer_shape(self, data1_shape): | def infer_shape(self, data1_shape): | ||||
| ll = [] | |||||
| if len(data1_shape) == 2: | if len(data1_shape) == 2: | ||||
| return [1, ] | |||||
| ll = [1,] | |||||
| else: | else: | ||||
| return [32, 64] | |||||
| ll = [32, 64] | |||||
| return ll | |||||
| def infer_dtype(self, data1_dtype): | def infer_dtype(self, data1_dtype): | ||||
| return data1_dtype | return data1_dtype | ||||
| @@ -12,7 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusImg2Col""" | |||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| from mindspore.ops.composite import multitype_ops as C | from mindspore.ops.composite import multitype_ops as C | ||||
| @@ -49,3 +49,4 @@ class CusImg2Col(PrimitiveWithInfer): | |||||
| def infer_dtype(self, data1_dtype): | def infer_dtype(self, data1_dtype): | ||||
| return data1_dtype | return data1_dtype | ||||
| @@ -12,6 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusMatMulCube""" | |||||
| import mindspore as ms | import mindspore as ms | ||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| from mindspore.ops.composite import multitype_ops as C | from mindspore.ops.composite import multitype_ops as C | ||||
| @@ -37,11 +38,11 @@ class CusMatMulCube(PrimitiveWithInfer): | |||||
| def infer_shape(self, data1_shape, data2_shape): | def infer_shape(self, data1_shape, data2_shape): | ||||
| # shape = [1, data1_shape[1], data2_shape[2], 16, 16] | # shape = [1, data1_shape[1], data2_shape[2], 16, 16] | ||||
| # return shape | # return shape | ||||
| if self.transpose_a == True: | |||||
| if self.transpose_a: | |||||
| k1, m = data1_shape | k1, m = data1_shape | ||||
| else: | else: | ||||
| m, k1 = data1_shape | m, k1 = data1_shape | ||||
| if self.transpose_b == True: | |||||
| if self.transpose_b: | |||||
| n, k2 = data2_shape | n, k2 = data2_shape | ||||
| else: | else: | ||||
| k2, n = data2_shape | k2, n = data2_shape | ||||
| @@ -51,3 +52,4 @@ class CusMatMulCube(PrimitiveWithInfer): | |||||
| def infer_dtype(self, data1_dtype, data2_dtype): | def infer_dtype(self, data1_dtype, data2_dtype): | ||||
| return ms.common.dtype.tensor_type(getattr(ms, "float32")) | return ms.common.dtype.tensor_type(getattr(ms, "float32")) | ||||
| @@ -12,6 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusMatMulCubeDenseLeft""" | |||||
| import mindspore as ms | import mindspore as ms | ||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| from mindspore.ops.composite import multitype_ops as C | from mindspore.ops.composite import multitype_ops as C | ||||
| @@ -12,6 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusMatMulCubeFraczRightMul""" | |||||
| import mindspore as ms | import mindspore as ms | ||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| from mindspore.ops.composite import multitype_ops as C | from mindspore.ops.composite import multitype_ops as C | ||||
| @@ -12,6 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusMatrixCombine""" | |||||
| from mindspore.ops import prim_attr_register, PrimitiveWithInfer | from mindspore.ops import prim_attr_register, PrimitiveWithInfer | ||||
| from mindspore.ops.composite import multitype_ops as C | from mindspore.ops.composite import multitype_ops as C | ||||
| @@ -12,7 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusFusedAbsMax1""" | |||||
| from mindspore.ops.op_info_register import op_info_register | from mindspore.ops.op_info_register import op_info_register | ||||
| @@ -65,4 +65,5 @@ from mindspore.ops.op_info_register import op_info_register | |||||
| ] | ] | ||||
| }""") | }""") | ||||
| def CusFusedAbsMax1(input_x, output, origin_shape=None, kernel_name="fused_abs_max1"): | def CusFusedAbsMax1(input_x, output, origin_shape=None, kernel_name="fused_abs_max1"): | ||||
| """CusFusedAbsMax1""" | |||||
| return | return | ||||
| @@ -12,7 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusImg2ColNC1HWC0""" | |||||
| from mindspore.ops.op_info_register import op_info_register | from mindspore.ops.op_info_register import op_info_register | ||||
| @@ -83,4 +83,5 @@ from mindspore.ops.op_info_register import op_info_register | |||||
| ] | ] | ||||
| }""") | }""") | ||||
| def CusImg2ColNC1HWC0(input_x, output, ksizes, strides, dilates, padding, kernel_name="img2col"): | def CusImg2ColNC1HWC0(input_x, output, ksizes, strides, dilates, padding, kernel_name="img2col"): | ||||
| """CusImg2ColNC1HWC0""" | |||||
| return | return | ||||
| @@ -2,11 +2,11 @@ | |||||
| # -*- coding:utf-8 -*- | # -*- coding:utf-8 -*- | ||||
| """ | """ | ||||
| copyright 2020 Huawei Technologies Co., Ltd | copyright 2020 Huawei Technologies Co., Ltd | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | You may obtain a copy of the License at | ||||
| http://www.apache.org/licenses/LICENSE-2.0 | http://www.apache.org/licenses/LICENSE-2.0 | ||||
| Unless required by applicable law or agreed to in writing, software | Unless required by applicable law or agreed to in writing, software | ||||
| @@ -14,7 +14,7 @@ distributed under the License == distributed on an "AS IS" BASIS, | |||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
| limitations under the License. | limitations under the License. | ||||
| matmul | matmul | ||||
| """ | """ | ||||
| from __future__ import absolute_import | from __future__ import absolute_import | ||||
| @@ -98,4 +98,5 @@ NoneType = type(None) | |||||
| @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | ||||
| def CusMatMulCubeDenseLeft(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, | def CusMatMulCubeDenseLeft(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, | ||||
| kernel_name="matmulcube"): | kernel_name="matmulcube"): | ||||
| """CusMatMulCubeDenseLeft""" | |||||
| return | return | ||||
| @@ -2,19 +2,19 @@ | |||||
| # -*- coding:utf-8 -*- | # -*- coding:utf-8 -*- | ||||
| """ | """ | ||||
| copyright 2020 Huawei Technologies Co., Ltd | copyright 2020 Huawei Technologies Co., Ltd | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | You may obtain a copy of the License at | ||||
| http://www.apache.org/licenses/LICENSE-2.0 | http://www.apache.org/licenses/LICENSE-2.0 | ||||
| Unless required by applicable law or agreed to in writing, software | Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License == distributed on an "AS IS" BASIS, | distributed under the License == distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
| limitations under the License. | limitations under the License. | ||||
| matmul | matmul | ||||
| """ | """ | ||||
| from __future__ import absolute_import | from __future__ import absolute_import | ||||
| @@ -99,4 +99,5 @@ NoneType = type(None) | |||||
| @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | ||||
| def CusMatMulCubeFraczLeftCast(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, | def CusMatMulCubeFraczLeftCast(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, | ||||
| kernel_name="CusMatMulCubeFraczLeftCast"): | kernel_name="CusMatMulCubeFraczLeftCast"): | ||||
| """CusMatMulCubeFraczLeftCast""" | |||||
| return | return | ||||
| @@ -2,19 +2,19 @@ | |||||
| # -*- coding:utf-8 -*- | # -*- coding:utf-8 -*- | ||||
| """ | """ | ||||
| copyright 2020 Huawei Technologies Co., Ltd | copyright 2020 Huawei Technologies Co., Ltd | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | You may obtain a copy of the License at | ||||
| http://www.apache.org/licenses/LICENSE-2.0 | http://www.apache.org/licenses/LICENSE-2.0 | ||||
| Unless required by applicable law or agreed to in writing, software | Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License == distributed on an "AS IS" BASIS, | distributed under the License == distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
| limitations under the License. | limitations under the License. | ||||
| matmul | matmul | ||||
| """ | """ | ||||
| from __future__ import absolute_import | from __future__ import absolute_import | ||||
| @@ -2,19 +2,19 @@ | |||||
| # -*- coding:utf-8 -*- | # -*- coding:utf-8 -*- | ||||
| """ | """ | ||||
| copyright 2020 Huawei Technologies Co., Ltd | copyright 2020 Huawei Technologies Co., Ltd | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | You may obtain a copy of the License at | ||||
| http://www.apache.org/licenses/LICENSE-2.0 | http://www.apache.org/licenses/LICENSE-2.0 | ||||
| Unless required by applicable law or agreed to in writing, software | Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License == distributed on an "AS IS" BASIS, | distributed under the License == distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | See the License for the specific language governing permissions and | ||||
| limitations under the License. | limitations under the License. | ||||
| matmul | matmul | ||||
| """ | """ | ||||
| from __future__ import absolute_import | from __future__ import absolute_import | ||||
| @@ -110,4 +110,5 @@ NoneType = type(None) | |||||
| # pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements | # pylint: disable=locally-disabled,too-many-arguments, too-many-locals, too-many-statements | ||||
| @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | @util.check_input_type(dict, dict, (dict, NoneType), dict, bool, bool, str) | ||||
| def CusMatMulCube(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"): | def CusMatMulCube(input_x1, input_x2, bias=None, output_y={}, trans_a=False, trans_b=False, kernel_name="matmulcube"): | ||||
| """CusMatMulCube""" | |||||
| return | return | ||||
| @@ -12,7 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusMatrixCombine""" | |||||
| from mindspore.ops.op_info_register import op_info_register | from mindspore.ops.op_info_register import op_info_register | ||||
| @@ -59,4 +59,5 @@ from mindspore.ops.op_info_register import op_info_register | |||||
| ] | ] | ||||
| }""") | }""") | ||||
| def CusMatrixCombine(input_x, output, kernel_name="matrix_combine"): | def CusMatrixCombine(input_x, output, kernel_name="matrix_combine"): | ||||
| """CusMatrixCombine""" | |||||
| return | return | ||||
| @@ -12,7 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """CusTranspose02314""" | |||||
| from mindspore.ops.op_info_register import op_info_register | from mindspore.ops.op_info_register import op_info_register | ||||
| @@ -59,4 +59,5 @@ from mindspore.ops.op_info_register import op_info_register | |||||
| ] | ] | ||||
| }""") | }""") | ||||
| def CusTranspose02314(input_x, output, kernel_name="transpose021354"): | def CusTranspose02314(input_x, output, kernel_name="transpose021354"): | ||||
| """CusTranspose02314""" | |||||
| return | return | ||||
| @@ -19,12 +19,14 @@ import numpy as np | |||||
| def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): | def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): | ||||
| """linear_warmup_lr""" | |||||
| lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) | lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) | ||||
| lr = float(init_lr) + lr_inc * current_step | lr = float(init_lr) + lr_inc * current_step | ||||
| return lr | return lr | ||||
| def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5): | def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5): | ||||
| """linear_warmup_lr""" | |||||
| base_lr = lr | base_lr = lr | ||||
| warmup_init_lr = 0 | warmup_init_lr = 0 | ||||
| total_steps = int(max_epoch * steps_per_epoch) | total_steps = int(max_epoch * steps_per_epoch) | ||||
| @@ -44,6 +46,7 @@ def cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, et | |||||
| def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5): | def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min=0, num_periods=0.5): | ||||
| """warmup_cosine_annealing_lr""" | |||||
| base_lr = lr | base_lr = lr | ||||
| warmup_init_lr = 0 | warmup_init_lr = 0 | ||||
| total_steps = int(max_epoch * steps_per_epoch * 0.99) | total_steps = int(max_epoch * steps_per_epoch * 0.99) | ||||
| @@ -25,17 +25,17 @@ from mindspore.train.parallel_utils import ParallelMode | |||||
| class DatasetHelper: | class DatasetHelper: | ||||
| """ | """ | ||||
| Help function to use the Minddata dataset. | Help function to use the Minddata dataset. | ||||
| According to different context, change the iter of dataset, to use the same for loop in different context. | According to different context, change the iter of dataset, to use the same for loop in different context. | ||||
| Note: | Note: | ||||
| The iter of DatasetHelper will give one epoch data. | The iter of DatasetHelper will give one epoch data. | ||||
| Args: | Args: | ||||
| dataset (DataSet): The dataset. | dataset (DataSet): The dataset. | ||||
| dataset_sink_mode (bool): If true use GetNext to fetch the data, or else feed the data from host. | dataset_sink_mode (bool): If true use GetNext to fetch the data, or else feed the data from host. | ||||
| Default: True. | Default: True. | ||||
| Examples: | Examples: | ||||
| >>> dataset_helper = DatasetHelper(dataset) | >>> dataset_helper = DatasetHelper(dataset) | ||||
| >>> for inputs in dataset_helper: | >>> for inputs in dataset_helper: | ||||
| @@ -12,6 +12,7 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """grad_reducer_thor""" | |||||
| import mindspore.common.dtype as mstype | import mindspore.common.dtype as mstype | ||||
| from mindspore.communication.management import GlobalComm, get_group_size | from mindspore.communication.management import GlobalComm, get_group_size | ||||
| from mindspore.nn.cell import Cell | from mindspore.nn.cell import Cell | ||||
| @@ -13,8 +13,6 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """Model.""" | """Model.""" | ||||
| import mindspore.nn as nn | |||||
| import numpy as np | |||||
| from mindspore import context | from mindspore import context | ||||
| from mindspore import log as logger | from mindspore import log as logger | ||||
| from mindspore._c_expression import init_exec_dataset | from mindspore._c_expression import init_exec_dataset | ||||
| @@ -30,16 +28,17 @@ from mindspore.parallel._utils import _get_parallel_mode, _get_device_num, _get_ | |||||
| from mindspore.train import amp | from mindspore.train import amp | ||||
| from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks | from mindspore.train.callback import _InternalCallbackParam, RunContext, _build_callbacks | ||||
| from mindspore.train.parallel_utils import ParallelMode | from mindspore.train.parallel_utils import ParallelMode | ||||
| import mindspore.nn as nn | |||||
| from second_order.dataset_helper import DatasetHelper | from second_order.dataset_helper import DatasetHelper | ||||
| import numpy as np | |||||
| def _convert_type(types): | def _convert_type(types): | ||||
| """ | """ | ||||
| Convert from numpy type to tensor type. | Convert from numpy type to tensor type. | ||||
| Args: | Args: | ||||
| types (list): Numpy type list of element in dataset. | types (list): Numpy type list of element in dataset. | ||||
| Returns: | Returns: | ||||
| list, list of element in dataset. | list, list of element in dataset. | ||||
| """ | """ | ||||
| @@ -76,9 +75,9 @@ def _exec_datagraph(exec_dataset, dataset_size, phase='dataset'): | |||||
| class Model: | class Model: | ||||
| """ | """ | ||||
| High-Level API for Training or Testing. | High-Level API for Training or Testing. | ||||
| `Model` groups layers into an object with training and inference features. | `Model` groups layers into an object with training and inference features. | ||||
| Args: | Args: | ||||
| network (Cell): The training or testing network. | network (Cell): The training or testing network. | ||||
| loss_fn (Cell): Objective function, if loss_fn is None, the | loss_fn (Cell): Objective function, if loss_fn is None, the | ||||
| @@ -96,15 +95,15 @@ class Model: | |||||
| metric. Default: None. | metric. Default: None. | ||||
| amp_level (str): Option for argument `level` in `mindspore.amp.build_train_network`, level for mixed | amp_level (str): Option for argument `level` in `mindspore.amp.build_train_network`, level for mixed | ||||
| precision training. Supports [O0, O2]. Default: "O0". | precision training. Supports [O0, O2]. Default: "O0". | ||||
| - O0: Do not change. | - O0: Do not change. | ||||
| - O2: Cast network to float16, keep batchnorm run in float32, using dynamic loss scale. | - O2: Cast network to float16, keep batchnorm run in float32, using dynamic loss scale. | ||||
| loss_scale_manager (Union[None, LossScaleManager]): If None, not scale the loss, or else | loss_scale_manager (Union[None, LossScaleManager]): If None, not scale the loss, or else | ||||
| scale the loss by LossScaleManager. If it is set, overwrite the level setting. It's a eyword argument. | scale the loss by LossScaleManager. If it is set, overwrite the level setting. It's a eyword argument. | ||||
| e.g. Use `loss_scale_manager=None` to set the value. | e.g. Use `loss_scale_manager=None` to set the value. | ||||
| keep_batchnorm_fp32 (bool): Keep Batchnorm run in `float32`. If set, overwrite the level setting. Default: True. | keep_batchnorm_fp32 (bool): Keep Batchnorm run in `float32`. If set, overwrite the level setting. Default: True. | ||||
| Examples: | Examples: | ||||
| >>> class Net(nn.Cell): | >>> class Net(nn.Cell): | ||||
| >>> def __init__(self): | >>> def __init__(self): | ||||
| @@ -250,7 +249,7 @@ class Model: | |||||
| def _train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True): | def _train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True): | ||||
| """ | """ | ||||
| Training. | Training. | ||||
| Args: | Args: | ||||
| epoch (int): Total number of iterations on the data. | epoch (int): Total number of iterations on the data. | ||||
| train_dataset (Dataset): A training dataset iterator. If there is no | train_dataset (Dataset): A training dataset iterator. If there is no | ||||
| @@ -296,7 +295,7 @@ class Model: | |||||
| def _train_dataset_sink_process(self, epoch, train_dataset, list_callback=None, cb_params=None): | def _train_dataset_sink_process(self, epoch, train_dataset, list_callback=None, cb_params=None): | ||||
| """ | """ | ||||
| Training process. The data would be passed to network through dataset channel. | Training process. The data would be passed to network through dataset channel. | ||||
| Args: | Args: | ||||
| epoch (int): Total number of iterations on the data. | epoch (int): Total number of iterations on the data. | ||||
| train_dataset (Dataset): A training dataset iterator. If there is no | train_dataset (Dataset): A training dataset iterator. If there is no | ||||
| @@ -366,7 +365,7 @@ class Model: | |||||
| def _train_process(self, epoch, train_dataset, list_callback=None, cb_params=None): | def _train_process(self, epoch, train_dataset, list_callback=None, cb_params=None): | ||||
| """ | """ | ||||
| Training process. The data would be passed to network directly. | Training process. The data would be passed to network directly. | ||||
| Args: | Args: | ||||
| epoch (int): Total number of iterations on the data. | epoch (int): Total number of iterations on the data. | ||||
| train_dataset (Dataset): A training dataset iterator. If there is no | train_dataset (Dataset): A training dataset iterator. If there is no | ||||
| @@ -426,9 +425,9 @@ class Model: | |||||
| def train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True): | def train(self, epoch, train_dataset, callbacks=None, dataset_sink_mode=True): | ||||
| """ | """ | ||||
| Training API where the iteration is controlled by python front-end. | Training API where the iteration is controlled by python front-end. | ||||
| When setting pynative mode, the training process will be performed with dataset not sink. | When setting pynative mode, the training process will be performed with dataset not sink. | ||||
| Note: | Note: | ||||
| CPU is not supported when dataset_sink_mode is true. | CPU is not supported when dataset_sink_mode is true. | ||||
| If dataset_sink_mode is True, epoch of training should be equal to the count of repeat | If dataset_sink_mode is True, epoch of training should be equal to the count of repeat | ||||
| @@ -436,7 +435,7 @@ class Model: | |||||
| is not the amount training requires. | is not the amount training requires. | ||||
| If dataset_sink_mode is True, data will be sent to device. If device is Ascend, features | If dataset_sink_mode is True, data will be sent to device. If device is Ascend, features | ||||
| of data will be transferred one by one. The limitation of data transmission per time is 256M. | of data will be transferred one by one. The limitation of data transmission per time is 256M. | ||||
| Args: | Args: | ||||
| epoch (int): Total number of iterations on the data. | epoch (int): Total number of iterations on the data. | ||||
| train_dataset (Dataset): A training dataset iterator. If there is no | train_dataset (Dataset): A training dataset iterator. If there is no | ||||
| @@ -448,8 +447,8 @@ class Model: | |||||
| dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. | dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. | ||||
| Configure pynative mode, the training process will be performed with | Configure pynative mode, the training process will be performed with | ||||
| dataset not sink. | dataset not sink. | ||||
| Examples: | Examples: | ||||
| >>> dataset = get_dataset() | >>> dataset = get_dataset() | ||||
| >>> net = Net() | >>> net = Net() | ||||
| @@ -477,12 +476,12 @@ class Model: | |||||
| def _eval_dataset_sink_process(self, valid_dataset, list_callback=None, cb_params=None): | def _eval_dataset_sink_process(self, valid_dataset, list_callback=None, cb_params=None): | ||||
| """ | """ | ||||
| Evaluation. The data would be passed to network through dataset channel. | Evaluation. The data would be passed to network through dataset channel. | ||||
| Args: | Args: | ||||
| valid_dataset (Dataset): Dataset to evaluate the model. | valid_dataset (Dataset): Dataset to evaluate the model. | ||||
| list_callback (ListCallback): Executor of callback list. Default: None. | list_callback (ListCallback): Executor of callback list. Default: None. | ||||
| cb_params (_InternalCallbackParam): Callback parameters. Default: None. | cb_params (_InternalCallbackParam): Callback parameters. Default: None. | ||||
| Returns: | Returns: | ||||
| Dict, returns the loss value & metrics values for the model in test mode. | Dict, returns the loss value & metrics values for the model in test mode. | ||||
| """ | """ | ||||
| @@ -526,7 +525,7 @@ class Model: | |||||
| def _eval_process(self, valid_dataset, list_callback=None, cb_params=None): | def _eval_process(self, valid_dataset, list_callback=None, cb_params=None): | ||||
| """ | """ | ||||
| Evaluation. The data would be passed to network directly. | Evaluation. The data would be passed to network directly. | ||||
| Args: | Args: | ||||
| valid_dataset (Dataset): Dataset to evaluate the model. | valid_dataset (Dataset): Dataset to evaluate the model. | ||||
| list_callback (ListCallback): Executor of callback list. Default: None. | list_callback (ListCallback): Executor of callback list. Default: None. | ||||
| @@ -555,23 +554,23 @@ class Model: | |||||
| def eval(self, valid_dataset, callbacks=None, dataset_sink_mode=True): | def eval(self, valid_dataset, callbacks=None, dataset_sink_mode=True): | ||||
| """ | """ | ||||
| Evaluation API where the iteration is controlled by python front-end. | Evaluation API where the iteration is controlled by python front-end. | ||||
| Configure to pynative mode, the evaluation will be performed with dataset non-sink mode. | Configure to pynative mode, the evaluation will be performed with dataset non-sink mode. | ||||
| Note: | Note: | ||||
| CPU is not supported when dataset_sink_mode is true. | CPU is not supported when dataset_sink_mode is true. | ||||
| If dataset_sink_mode is True, data will be sent to device. If device is Ascend, features | If dataset_sink_mode is True, data will be sent to device. If device is Ascend, features | ||||
| of data will be transferred one by one. The limitation of data transmission per time is 256M. | of data will be transferred one by one. The limitation of data transmission per time is 256M. | ||||
| Args: | Args: | ||||
| valid_dataset (Dataset): Dataset to evaluate the model. | valid_dataset (Dataset): Dataset to evaluate the model. | ||||
| callbacks (list): List of callback object. Callbacks which should be excuted | callbacks (list): List of callback object. Callbacks which should be excuted | ||||
| while training. Default: None. | while training. Default: None. | ||||
| dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. | dataset_sink_mode (bool): Determines whether to pass the data through dataset channel. Default: True. | ||||
| Returns: | Returns: | ||||
| Dict, returns the loss value & metrics values for the model in test mode. | Dict, returns the loss value & metrics values for the model in test mode. | ||||
| Examples: | Examples: | ||||
| >>> dataset = get_dataset() | >>> dataset = get_dataset() | ||||
| >>> net = Net() | >>> net = Net() | ||||
| @@ -603,18 +602,18 @@ class Model: | |||||
| def predict(self, *predict_data): | def predict(self, *predict_data): | ||||
| """ | """ | ||||
| Generates output predictions for the input samples. | Generates output predictions for the input samples. | ||||
| Data could be single tensor, or list of tensor, tuple of tensor. | Data could be single tensor, or list of tensor, tuple of tensor. | ||||
| Note: | Note: | ||||
| Batch data should be put together in one tensor. | Batch data should be put together in one tensor. | ||||
| Args: | Args: | ||||
| predict_data (Tensor): Tensor of predict data. can be array, list or tuple. | predict_data (Tensor): Tensor of predict data. can be array, list or tuple. | ||||
| Returns: | Returns: | ||||
| Tensor, array(s) of predictions. | Tensor, array(s) of predictions. | ||||
| Examples: | Examples: | ||||
| >>> input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) | >>> input_data = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32) | ||||
| >>> model = Model(Net()) | >>> model = Model(Net()) | ||||
| @@ -126,8 +126,8 @@ def _bn_last(channel): | |||||
| def _fc(in_channel, out_channel, damping, loss_scale, frequency): | def _fc(in_channel, out_channel, damping, loss_scale, frequency): | ||||
| weight_shape = (out_channel, in_channel) | weight_shape = (out_channel, in_channel) | ||||
| weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5)) | weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5)) | ||||
| return Dense_Thor(in_channel, out_channel, has_bias=False, weight_init=weight, bias_init=0, | |||||
| damping=damping, loss_scale=loss_scale, frequency=frequency) | |||||
| return Dense_Thor(in_channel, out_channel, has_bias=False, weight_init=weight, | |||||
| bias_init=0, damping=damping, loss_scale=loss_scale, frequency=frequency) | |||||
| class ResidualBlock(nn.Cell): | class ResidualBlock(nn.Cell): | ||||
| @@ -14,8 +14,6 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| """momentum""" | """momentum""" | ||||
| import mindspore.common.dtype as mstype | import mindspore.common.dtype as mstype | ||||
| from cus_ops.cus_matmul_cube_dense_right import CusMatMulCubeDenseRight | |||||
| from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast | |||||
| from mindspore.common.initializer import initializer | from mindspore.common.initializer import initializer | ||||
| from mindspore.common.parameter import Parameter | from mindspore.common.parameter import Parameter | ||||
| from mindspore.common.parameter import ParameterTuple | from mindspore.common.parameter import ParameterTuple | ||||
| @@ -24,6 +22,8 @@ from mindspore.nn.optim.optimizer import Optimizer | |||||
| from mindspore.ops import functional as F, composite as C, operations as P | from mindspore.ops import functional as F, composite as C, operations as P | ||||
| from mindspore.parallel._utils import _get_device_num, _get_mirror_mean | from mindspore.parallel._utils import _get_device_num, _get_mirror_mean | ||||
| from cus_ops.cus_matmul_cube_dense_right import CusMatMulCubeDenseRight | |||||
| from cus_ops.cus_matmul_cube_fracz_left_cast import CusMatMulCubeFraczLeftCast | |||||
| from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft | from cus_ops.cus_matmul_cube_dense_left import CusMatMulCubeDenseLeft | ||||
| from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul | from cus_ops.cus_matmul_cube_fracz_right_mul import CusMatMulCubeFraczRightMul | ||||
| from model.grad_reducer_thor import DistributedGradReducerThor | from model.grad_reducer_thor import DistributedGradReducerThor | ||||
| @@ -52,6 +52,7 @@ def _tensor_apply_decay(weight_decay, if_apply, weight, gradient): | |||||
| class THOR(Optimizer): | class THOR(Optimizer): | ||||
| """THOR""" | |||||
| def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, | def __init__(self, params, learning_rate, momentum, matrix_A, matrix_G, A_inv_max, G_inv_max, weight_decay=0.0, | ||||
| loss_scale=1.0, | loss_scale=1.0, | ||||
| decay_filter=lambda x: x.name not in []): | decay_filter=lambda x: x.name not in []): | ||||
| @@ -12,10 +12,9 @@ | |||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """thor_layer""" | |||||
| import mindspore as ms | import mindspore as ms | ||||
| import mindspore.common.dtype as mstype | import mindspore.common.dtype as mstype | ||||
| import numpy as np | |||||
| from mindspore._checkparam import check_bool, twice, check_int_positive | from mindspore._checkparam import check_bool, twice, check_int_positive | ||||
| from mindspore._extends import cell_attr_register | from mindspore._extends import cell_attr_register | ||||
| from mindspore.common.initializer import initializer | from mindspore.common.initializer import initializer | ||||
| @@ -33,6 +32,7 @@ from cus_ops.cus_matmul_cube import CusMatMulCube | |||||
| from cus_ops.cus_matrix_combine import CusMatrixCombine | from cus_ops.cus_matrix_combine import CusMatrixCombine | ||||
| from cus_ops.cus_transpose02314 import CusTranspose02314 | from cus_ops.cus_transpose02314 import CusTranspose02314 | ||||
| import numpy as np | |||||
| C0 = 16 | C0 = 16 | ||||
| @@ -91,8 +91,7 @@ class _Conv(Cell): | |||||
| 'attr \'group\' of \'Conv2D\' Op.') | 'attr \'group\' of \'Conv2D\' Op.') | ||||
| self.weight = Parameter(initializer( | self.weight = Parameter(initializer( | ||||
| weight_init, [out_channels, in_channels // group, *kernel_size]), | |||||
| name='weight') | |||||
| weight_init, [out_channels, in_channels // group, *kernel_size]), name='weight') | |||||
| if check_bool(has_bias): | if check_bool(has_bias): | ||||
| self.bias = Parameter(_initializer( | self.bias = Parameter(_initializer( | ||||
| @@ -107,6 +106,7 @@ class _Conv(Cell): | |||||
| class Conv2d_Thor(_Conv): | class Conv2d_Thor(_Conv): | ||||
| """Conv2d_Thor""" | |||||
| def __init__(self, | def __init__(self, | ||||
| in_channels, | in_channels, | ||||
| out_channels, | out_channels, | ||||
| @@ -180,7 +180,7 @@ class Conv2d_Thor(_Conv): | |||||
| self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) | self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False) | ||||
| self.fake_G = Tensor( | self.fake_G = Tensor( | ||||
| np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)) | np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)) | ||||
| self.fake_G_inv_max = Tensor(np.zeros([1, ]).astype(np.float32)) | |||||
| self.fake_G_inv_max = Tensor(np.zeros([1,]).astype(np.float32)) | |||||
| self.shape = P.Shape() | self.shape = P.Shape() | ||||
| self.reshape = P.Reshape() | self.reshape = P.Reshape() | ||||
| @@ -196,8 +196,8 @@ class Conv2d_Thor(_Conv): | |||||
| self.channels_slice_flag = True | self.channels_slice_flag = True | ||||
| self.padA_flag = False | self.padA_flag = False | ||||
| if ( | |||||
| self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim and self.matrix_A_dim > self.diag_block_dim: | |||||
| if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim | |||||
| and self.matrix_A_dim > self.diag_block_dim: | |||||
| self.padA_flag = True | self.padA_flag = True | ||||
| pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim | pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim | ||||
| self.padA = P.Pad(((0, pad_dim), (0, pad_dim))) | self.padA = P.Pad(((0, pad_dim), (0, pad_dim))) | ||||
| @@ -228,6 +228,7 @@ class Conv2d_Thor(_Conv): | |||||
| self.getG = P.InsertGradientOf(self.save_gradient) | self.getG = P.InsertGradientOf(self.save_gradient) | ||||
| def save_gradient(self, dout): | def save_gradient(self, dout): | ||||
| """save_gradient""" | |||||
| out = dout | out = dout | ||||
| dout = self.mul(dout, self.loss_scale) | dout = self.mul(dout, self.loss_scale) | ||||
| dout = self.mul(dout, 32.0) | dout = self.mul(dout, 32.0) | ||||
| @@ -252,7 +253,6 @@ class Conv2d_Thor(_Conv): | |||||
| matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv_max) | matrix_G_inv_max = self.fused_abs_max2(matrix_G_inv_max) | ||||
| self.G_inv_max = matrix_G_inv_max | self.G_inv_max = matrix_G_inv_max | ||||
| matrix_G_inv = self.matrix_combine(matrix_G_inv) | matrix_G_inv = self.matrix_combine(matrix_G_inv) | ||||
| matrix_G_inv_shape = self.shape(matrix_G_inv) | |||||
| matrix_G_inv = self.reshape(matrix_G_inv, self.matrix_G_device_temp_shape) | matrix_G_inv = self.reshape(matrix_G_inv, self.matrix_G_device_temp_shape) | ||||
| matrix_G_inv = self.transpose(matrix_G_inv, (2, 0, 1, 3)) | matrix_G_inv = self.transpose(matrix_G_inv, (2, 0, 1, 3)) | ||||
| matrix_G = self.cast(matrix_G_inv, mstype.float16) | matrix_G = self.cast(matrix_G_inv, mstype.float16) | ||||
| @@ -287,7 +287,6 @@ class Conv2d_Thor(_Conv): | |||||
| self.A_inv_max = matrix_A_inv_max | self.A_inv_max = matrix_A_inv_max | ||||
| matrix_A_inv = self.matrix_combine(matrix_A_inv) | matrix_A_inv = self.matrix_combine(matrix_A_inv) | ||||
| matrix_A_inv = self.cast(matrix_A_inv, mstype.float16) | matrix_A_inv = self.cast(matrix_A_inv, mstype.float16) | ||||
| in_channels = self.in_channels | |||||
| if self.padA_flag: | if self.padA_flag: | ||||
| matrix_A_inv = self.slice(matrix_A_inv, (0, 0), (self.matrix_A_dim, self.matrix_A_dim)) | matrix_A_inv = self.slice(matrix_A_inv, (0, 0), (self.matrix_A_dim, self.matrix_A_dim)) | ||||
| @@ -307,22 +306,23 @@ class Conv2d_Thor(_Conv): | |||||
| return out | return out | ||||
| def extra_repr(self): | def extra_repr(self): | ||||
| """extra_repr""" | |||||
| s = 'input_channels={}, output_channels={}, kernel_size={},' \ | s = 'input_channels={}, output_channels={}, kernel_size={},' \ | ||||
| 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ | 'stride={}, pad_mode={}, padding={}, dilation={}, ' \ | ||||
| 'group={}, data_format={}, has_bias={},' \ | 'group={}, data_format={}, has_bias={},' \ | ||||
| 'weight_init={}, bias_init={}'.format( | 'weight_init={}, bias_init={}'.format( | ||||
| self.in_channels, | |||||
| self.out_channels, | |||||
| self.kernel_size, | |||||
| self.stride, | |||||
| self.pad_mode, | |||||
| self.padding, | |||||
| self.dilation, | |||||
| self.group, | |||||
| self.data_format, | |||||
| self.has_bias, | |||||
| self.weight, | |||||
| self.bias) | |||||
| self.in_channels, | |||||
| self.out_channels, | |||||
| self.kernel_size, | |||||
| self.stride, | |||||
| self.pad_mode, | |||||
| self.padding, | |||||
| self.dilation, | |||||
| self.group, | |||||
| self.data_format, | |||||
| self.has_bias, | |||||
| self.weight, | |||||
| self.bias) | |||||
| if self.has_bias: | if self.has_bias: | ||||
| s += ', bias={}'.format(self.bias) | s += ', bias={}'.format(self.bias) | ||||
| @@ -330,6 +330,7 @@ class Conv2d_Thor(_Conv): | |||||
| class Dense_Thor(Cell): | class Dense_Thor(Cell): | ||||
| """Dense_Thor""" | |||||
| @cell_attr_register(attrs=['has_bias', 'activation']) | @cell_attr_register(attrs=['has_bias', 'activation']) | ||||
| def __init__(self, | def __init__(self, | ||||
| in_channels, | in_channels, | ||||
| @@ -405,6 +406,7 @@ class Dense_Thor(Cell): | |||||
| self.getG = P.InsertGradientOf(self.save_gradient) | self.getG = P.InsertGradientOf(self.save_gradient) | ||||
| def save_gradient(self, dout): | def save_gradient(self, dout): | ||||
| """save_gradient""" | |||||
| out = dout | out = dout | ||||
| dout = self.mul(dout, self.loss_scale) | dout = self.mul(dout, self.loss_scale) | ||||
| dout = self.mul(dout, 32.0) | dout = self.mul(dout, 32.0) | ||||
| @@ -435,6 +437,7 @@ class Dense_Thor(Cell): | |||||
| return out | return out | ||||
| def construct(self, x): | def construct(self, x): | ||||
| """construct""" | |||||
| if self.thor: | if self.thor: | ||||
| inputs = self.cube_matmul(x, x) | inputs = self.cube_matmul(x, x) | ||||
| normalizer = 32 | normalizer = 32 | ||||
| @@ -472,6 +475,7 @@ class Dense_Thor(Cell): | |||||
| return output | return output | ||||
| def extend_repr(self): | def extend_repr(self): | ||||
| """extend_repr""" | |||||
| str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \ | str_info = 'in_channels={}, out_channels={}, weight={}, has_bias={}' \ | ||||
| .format(self.in_channels, self.out_channels, self.weight, self.has_bias) | .format(self.in_channels, self.out_channels, self.weight, self.has_bias) | ||||
| if self.has_bias: | if self.has_bias: | ||||
| @@ -18,7 +18,6 @@ import os | |||||
| import random | import random | ||||
| import mindspore.dataset.engine as de | import mindspore.dataset.engine as de | ||||
| import numpy as np | |||||
| from mindspore import Tensor | from mindspore import Tensor | ||||
| from mindspore import context | from mindspore import context | ||||
| from mindspore.communication.management import init | from mindspore.communication.management import init | ||||
| @@ -30,6 +29,7 @@ from second_order.model_second_order import Model | |||||
| from second_order.resnet import resnet50 | from second_order.resnet import resnet50 | ||||
| from second_order.thor import THOR | from second_order.thor import THOR | ||||
| import numpy as np | |||||
| from config_imagenet import config | from config_imagenet import config | ||||
| from crossentropy import CrossEntropy | from crossentropy import CrossEntropy | ||||
| from dataset_imagenet import create_dataset | from dataset_imagenet import create_dataset | ||||
| @@ -56,13 +56,14 @@ context.set_context(enable_mem_reuse=True) | |||||
| def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epoch): | def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epoch): | ||||
| """get_second_order_lr""" | |||||
| lr_each_step = [] | lr_each_step = [] | ||||
| total_steps = steps_per_epoch * total_epochs | total_steps = steps_per_epoch * total_epochs | ||||
| for i in range(total_steps): | for i in range(total_steps): | ||||
| epoch = (i + 1) / steps_per_epoch | epoch = (i + 1) / steps_per_epoch | ||||
| base = (1.0 - float(epoch) / total_epochs) ** decay | base = (1.0 - float(epoch) / total_epochs) ** decay | ||||
| lr = lr_init * base | |||||
| lr_each_step.append(lr) | |||||
| lr_local = lr_init * base | |||||
| lr_each_step.append(lr_local) | |||||
| current_step = global_step | current_step = global_step | ||||
| lr_each_step = np.array(lr_each_step).astype(np.float32) | lr_each_step = np.array(lr_each_step).astype(np.float32) | ||||
| print("learning_rate_is=====", lr_each_step) | print("learning_rate_is=====", lr_each_step) | ||||
| @@ -71,12 +72,13 @@ def get_second_order_lr(global_step, lr_init, decay, total_epochs, steps_per_epo | |||||
| def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs, steps_per_epoch): | def get_second_order_damping(global_step, damping_init, decay_rate, total_epochs, steps_per_epoch): | ||||
| """get_second_order_damping""" | |||||
| damping_each_step = [] | damping_each_step = [] | ||||
| total_steps = steps_per_epoch * total_epochs | total_steps = steps_per_epoch * total_epochs | ||||
| for step in range(total_steps): | for step in range(total_steps): | ||||
| epoch = (step + 1) / steps_per_epoch | epoch = (step + 1) / steps_per_epoch | ||||
| damping = damping_init * (decay_rate ** (epoch / 10)) | |||||
| damping_each_step.append(damping) | |||||
| damping_here = damping_init * (decay_rate ** (epoch / 10)) | |||||
| damping_each_step.append(damping_here) | |||||
| current_step = global_step | current_step = global_step | ||||
| damping_each_step = np.array(damping_each_step).astype(np.float32) | damping_each_step = np.array(damping_each_step).astype(np.float32) | ||||