From bcee320dbb88bd42bd5c2edb5df8ada105ed4f7c Mon Sep 17 00:00:00 2001 From: yoni Date: Tue, 13 Oct 2020 09:04:57 +0300 Subject: [PATCH] tod add networks and ops --- mindspore/lite/CMakeLists.txt | 1 + mindspore/lite/include/train_session.h | 31 +- mindspore/lite/nnacl/batchnorm_parameter.h | 1 + mindspore/lite/nnacl/fp32/batchnorm.c | 24 +- mindspore/lite/nnacl/fp32/batchnorm.h | 4 +- mindspore/lite/nnacl/fp32_grad/optimizer.h | 36 + mindspore/lite/nnacl/fp32_grad/pack_ext.c | 98 ++- mindspore/lite/nnacl/fp32_grad/pack_ext.h | 2 +- mindspore/lite/nnacl/fp32_grad/pooling_grad.c | 6 +- mindspore/lite/nnacl/fp32_grad/pooling_grad.h | 4 +- mindspore/lite/schema/model.fbs | 2 + mindspore/lite/schema/ops.fbs | 26 + mindspore/lite/src/common/file_utils_ext.cc | 2 +- mindspore/lite/src/lite_kernel.cc | 16 + mindspore/lite/src/lite_kernel.h | 8 + mindspore/lite/src/ops/apply_momentum.cc | 17 +- mindspore/lite/src/ops/apply_momentum.h | 3 + mindspore/lite/src/ops/bias_grad.cc | 1 + mindspore/lite/src/ops/bn_grad.cc | 3 +- mindspore/lite/src/ops/bn_grad.h | 6 +- mindspore/lite/src/ops/fused_batchnorm.cc | 15 + mindspore/lite/src/ops/fused_batchnorm.h | 1 + mindspore/lite/src/ops/pooling_grad.cc | 10 +- mindspore/lite/src/ops/primitive_c.cc | 13 +- mindspore/lite/src/ops/sgd.cc | 97 +++ mindspore/lite/src/ops/sgd.h | 49 ++ mindspore/lite/src/populate_parameter.cc | 1 + .../kernel/arm/fp32/fused_batchnorm.cc | 64 +- .../runtime/kernel/arm/fp32/fused_batchnorm.h | 4 + .../kernel/arm/fp32_grad/activation_grad.cc | 40 +- .../kernel/arm/fp32_grad/apply_momentum.cc | 48 +- .../kernel/arm/fp32_grad/apply_momentum.h | 9 +- .../kernel/arm/fp32_grad/arithmetic_grad.cc | 31 +- .../kernel/arm/fp32_grad/arithmetic_grad.h | 1 + .../runtime/kernel/arm/fp32_grad/bias_grad.cc | 34 +- .../runtime/kernel/arm/fp32_grad/bias_grad.h | 1 + .../runtime/kernel/arm/fp32_grad/bn_grad.cc | 46 +- .../runtime/kernel/arm/fp32_grad/bn_grad.h | 12 +- .../kernel/arm/fp32_grad/convolution.cc | 48 +- .../kernel/arm/fp32_grad/convolution.h | 10 +- .../arm/fp32_grad/convolution_grad_filter.cc | 53 +- .../arm/fp32_grad/convolution_grad_filter.h | 11 +- .../arm/fp32_grad/convolution_grad_input.cc | 48 +- .../arm/fp32_grad/convolution_grad_input.h | 10 +- .../fp32_grad/deconvolution_grad_filter.cc | 155 +++++ .../arm/fp32_grad/deconvolution_grad_filter.h | 40 ++ .../runtime/kernel/arm/fp32_grad/make_tuple.h | 1 + .../kernel/arm/fp32_grad/pooling_grad.cc | 45 +- .../kernel/arm/fp32_grad/pooling_grad.h | 3 + .../kernel/arm/fp32_grad/power_grad.cc | 39 +- .../runtime/kernel/arm/fp32_grad/power_grad.h | 1 + .../src/runtime/kernel/arm/fp32_grad/sgd.cc | 121 ++++ .../src/runtime/kernel/arm/fp32_grad/sgd.h | 44 ++ .../softmax_cross_entropy_with_logits.cc | 49 +- .../softmax_cross_entropy_with_logits.h | 12 +- .../kernel/arm/fp32_grad/softmax_grad.cc | 49 +- .../kernel/arm/fp32_grad/softmax_grad.h | 11 +- ...parse_softmax_cross_entropy_with_logits.cc | 53 +- ...sparse_softmax_cross_entropy_with_logits.h | 10 +- .../kernel/arm/fp32_grad/tuple_getitem.cc | 46 +- .../kernel/arm/fp32_grad/tuple_getitem.h | 1 + .../src/train/train_populate_parameter.cc | 70 +- mindspore/lite/src/train/train_session.cc | 116 +++- mindspore/lite/src/train/train_session.h | 94 +++ .../fp32_grad/arithmetic_grad_fp32_tests.cc | 78 ++- .../arm/fp32_grad/bias_grad_fp32_tests.cc | 7 +- .../kernel/arm/fp32_grad/bn_grad_fp32_test.cc | 56 +- .../fp32_grad/convolution_grad_fp32_tests.cc | 263 +++++++- .../deconvolution_grad_fp32_tests.cc | 634 ++++++++++++++++++ .../kernel/arm/fp32_grad/network_test.cc | 152 ++++- .../arm/fp32_grad/pooling_grad_fp32_tests.cc | 83 ++- .../softmax_crossentropy_fp32_tests.cc | 9 +- .../arm/fp32_grad/softmax_grad_fp32_tests.cc | 345 +--------- .../conv/convfp32_dw_d2_g2_s2_12_2_3_3.bin | Bin 0 -> 864 bytes .../conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin | Bin 0 -> 21600 bytes .../convfp32_input0_d2_g2_s2_2_4_32_32.bin | Bin 0 -> 32768 bytes .../convfp32_inputdx_d2_g2_s2_2_4_32_32.bin | Bin 0 -> 32768 bytes .../conv/convfp32_w_d2_g2_s2_12_2_3_3.bin | Bin 0 -> 864 bytes .../deconv/deconvfp32_dw_9_3_3_3.bin | Bin 0 -> 972 bytes .../deconv/deconvfp32_dw_d2_9_3_3_3.bin | Bin 0 -> 972 bytes .../deconvfp32_dw_d2_g12_s2_12_1_3_3.bin | Bin 0 -> 432 bytes .../deconv/deconvfp32_dw_d2_g2_s2_6_4_3_3.bin | Bin 0 -> 864 bytes .../deconv/deconvfp32_dw_d2_g3_3_3_3_3.bin | 2 + .../deconv/deconvfp32_dw_d2_g3_s1_3_3_3_3.bin | Bin 0 -> 324 bytes .../deconv/deconvfp32_dy_2_9_63_63.bin | Bin 0 -> 285768 bytes .../deconv/deconvfp32_dy_d2_2_9_65_65.bin | Bin 0 -> 304200 bytes .../deconvfp32_dy_d2_g12_s2_2_12_65_65.bin | Bin 0 -> 405600 bytes .../deconvfp32_dy_d2_g2_s2_2_12_65_65.bin | Bin 0 -> 405600 bytes .../deconv/deconvfp32_dy_d2_g3_2_9_65_65.bin | Bin 0 -> 304200 bytes .../deconvfp32_dy_d2_g3_s1_2_9_34_34.bin | Bin 0 -> 83232 bytes .../deconv/deconvfp32_input0_2_3_32_32.bin | Bin 0 -> 24576 bytes .../deconv/deconvfp32_input0_d2_2_3_32_32.bin | Bin 0 -> 24576 bytes ...deconvfp32_input0_d2_g12_s2_2_12_32_32.bin | Bin 0 -> 98304 bytes .../deconvfp32_input0_d2_g2_s2_2_4_32_32.bin | Bin 0 -> 32768 bytes .../deconvfp32_input0_d2_g3_2_3_32_32.bin | Bin 0 -> 24576 bytes .../deconvfp32_input0_d2_g3_s1_2_3_32_32.bin | Bin 0 -> 24576 bytes .../test_data/deconv/deconvfp32_w_9_3_3_3.bin | Bin 0 -> 972 bytes .../arm/test_data/nets/effnetb0_fwd_fuse.ms | Bin 0 -> 21348576 bytes .../arm/test_data/nets/effnetb0_fwd_nofuse.ms | Bin 21612416 -> 21613912 bytes .../arm/test_data/nets/facenet_input.f32 | Bin 0 -> 150528 bytes .../arm/test_data/nets/facenet_output.f32 | Bin 0 -> 512 bytes .../kernel/arm/test_data/nets/lenet_train.ms | Bin 504680 -> 504056 bytes .../arm/test_data/nets/mobilefacenet0924.ms | Bin 0 -> 4052008 bytes .../arm/test_data/nets/retinaface0924.ms | Bin 0 -> 1770448 bytes .../arm/test_data/nets/retinaface_input.f32 | Bin 0 -> 2918400 bytes .../arm/test_data/nets/retinaface_out_0.f32 | Bin 0 -> 161280 bytes .../arm/test_data/nets/retinaface_out_1.f32 | Bin 0 -> 80640 bytes .../arm/test_data/nets/retinaface_out_2.f32 | Bin 0 -> 403200 bytes .../kernel/arm/test_data/nets/y_lenet.bin | Bin 1280 -> 1280 bytes .../lite/tools/anf_exporter/anf_exporter.cc | 29 +- mindspore/lite/tools/common/node_util.cc | 4 +- .../tools/converter/graphdef_transform.cc | 4 +- .../graph/format_trans_pass.cc | 3 +- 113 files changed, 2768 insertions(+), 808 deletions(-) create mode 100644 mindspore/lite/nnacl/fp32_grad/optimizer.h create mode 100644 mindspore/lite/src/ops/sgd.cc create mode 100644 mindspore/lite/src/ops/sgd.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.cc create mode 100644 mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.h create mode 100644 mindspore/lite/src/train/train_session.h create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_d2_g2_s2_12_2_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_input0_d2_g2_s2_2_4_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_inputdx_d2_g2_s2_2_4_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_w_d2_g2_s2_12_2_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_9_3_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_d2_9_3_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_d2_g12_s2_12_1_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_d2_g2_s2_6_4_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_d2_g3_3_3_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_d2_g3_s1_3_3_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dy_2_9_63_63.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dy_d2_2_9_65_65.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dy_d2_g12_s2_2_12_65_65.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dy_d2_g2_s2_2_12_65_65.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dy_d2_g3_2_9_65_65.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dy_d2_g3_s1_2_9_34_34.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_input0_2_3_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_input0_d2_2_3_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_input0_d2_g12_s2_2_12_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_input0_d2_g2_s2_2_4_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_input0_d2_g3_2_3_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_input0_d2_g3_s1_2_3_32_32.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_w_9_3_3_3.bin create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/effnetb0_fwd_fuse.ms create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/facenet_input.f32 create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/facenet_output.f32 create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/mobilefacenet0924.ms create mode 100644 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/retinaface0924.ms create mode 100755 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/retinaface_input.f32 create mode 100755 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/retinaface_out_0.f32 create mode 100755 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/retinaface_out_1.f32 create mode 100755 mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/nets/retinaface_out_2.f32 diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 8f4c553e01..515c5d9a6b 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -235,3 +235,4 @@ if (NOT WIN32) endif () include(${TOP_DIR}/cmake/package_lite.cmake) + diff --git a/mindspore/lite/include/train_session.h b/mindspore/lite/include/train_session.h index d1634cb083..c3cab39d61 100644 --- a/mindspore/lite/include/train_session.h +++ b/mindspore/lite/include/train_session.h @@ -17,37 +17,28 @@ #define MINDSPORE_LITE_INCLUDE_TRAIN_SESSION_H_ #include #include +#include #include -#include "src/lite_session.h" +#include "include/lite_session.h" +#include "include/train_model.h" namespace mindspore { -namespace lite { -struct TrainModel; -} - namespace session { -class TrainSession : public lite::LiteSession { - public: - TrainSession(); - ~TrainSession(); - int RunGraph(const session::KernelCallBack &before = nullptr, - const session::KernelCallBack &after = nullptr) override; - - int CompileGraph(lite::Model *model) override; - virtual void* ExportToBuf(char* buf, size_t* len) const; +class TrainSession : public session::LiteSession { + public: + virtual ~TrainSession() = default; + static TrainSession *CreateSession(lite::Context *context); - virtual void Train(); + virtual int CompileTrainGraph(lite::TrainModel *model) = 0; + virtual void *ExportToBuf(char *buf, size_t *len) const = 0; + virtual void Train() = 0; bool IsTrain() { return train_mode_ == true; } - virtual void Eval(); + virtual void Eval() = 0; bool IsEval() { return train_mode_ == false; } protected: - virtual void ReplaceOps(); bool train_mode_ = false; - lite::TrainModel *model_ = nullptr; - std::unordered_map> orig_output_map_; - std::unordered_map orig_output_tensor_map_; }; } // namespace session } // namespace mindspore diff --git a/mindspore/lite/nnacl/batchnorm_parameter.h b/mindspore/lite/nnacl/batchnorm_parameter.h index 8708ed2cb2..8e460cf438 100644 --- a/mindspore/lite/nnacl/batchnorm_parameter.h +++ b/mindspore/lite/nnacl/batchnorm_parameter.h @@ -22,6 +22,7 @@ typedef struct BatchNormParameter { OpParameter op_parameter_; float epsilon_; + float momentum_; int unit_; int units_; int channel_; diff --git a/mindspore/lite/nnacl/fp32/batchnorm.c b/mindspore/lite/nnacl/fp32/batchnorm.c index 755e12f4d0..1c01c9be02 100644 --- a/mindspore/lite/nnacl/fp32/batchnorm.c +++ b/mindspore/lite/nnacl/fp32/batchnorm.c @@ -54,22 +54,22 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset } } -void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var, - BatchNormParameter *param, float *save_mean, float *save_inv_var) { +void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, + float *save_mean, float *save_var) { float N = (float)param->unit_; for (int i = 0; i < param->unit_; i++) { - for (int f = 0; f < param->channel_; f++) { - int idx = i * param->channel_ + f; - run_mean[f] += input[idx]; - run_var[f] += input[idx] * input[idx]; + for (int c = 0; c < param->channel_; c++) { + int idx = i * param->channel_ + c; + run_mean[c] += input[idx]; + run_var[c] += input[idx] * input[idx]; } } const float VN = (N > 1.0f) ? (N - 1.0f) : 1.0f; - for (int f = 0; f < param->channel_; f++) { - run_mean[f] = run_mean[f] / N; - run_var[f] = run_var[f] / VN - run_mean[f] * run_mean[f]; - save_mean[f] = momentum * save_mean[f] + (1 - momentum) * run_mean[f]; - const float inv_var = 1.f / sqrt(run_var[f] + param->epsilon_); - save_inv_var[f] = momentum * save_inv_var[f] + (1 - momentum) * inv_var; + for (int c = 0; c < param->channel_; c++) { + run_mean[c] = run_mean[c] / N; + run_var[c] = run_var[c] / VN - run_mean[c] * run_mean[c]; + save_mean[c] = param->momentum_ * save_mean[c] + (1 - param->momentum_) * run_mean[c]; + const float var = run_var[c]; + save_var[c] = param->momentum_ * save_var[c] + (1 - param->momentum_) * var; } } diff --git a/mindspore/lite/nnacl/fp32/batchnorm.h b/mindspore/lite/nnacl/fp32/batchnorm.h index fa071425a3..6dfb05660b 100644 --- a/mindspore/lite/nnacl/fp32/batchnorm.h +++ b/mindspore/lite/nnacl/fp32/batchnorm.h @@ -28,8 +28,8 @@ void BatchNormFp32(const void *input, const void *mean, const void *variance, Ba void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, const void *variance, BatchNormParameter *param, int task_id, void *output); -void FusedBatchNormFp32MeanVar(const float *input, float momentum, float *run_mean, float *run_var, - BatchNormParameter *param, float *save_mean, float *save_var); +void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, + float *save_mean, float *save_var); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/fp32_grad/optimizer.h b/mindspore/lite/nnacl/fp32_grad/optimizer.h new file mode 100644 index 0000000000..9d03977a8c --- /dev/null +++ b/mindspore/lite/nnacl/fp32_grad/optimizer.h @@ -0,0 +1,36 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_OPTIMIZER_H_ +#define MINDSPORE_LITE_NNACL_FP32_GRAD_OPTIMIZER_H_ + +#include "nnacl/op_base.h" + +typedef struct ApplyMomentumParameter { + OpParameter op_parameter_; + bool use_locking_; + bool use_nesterov_; + float grad_scale_; +} ApplyMomentumParameter; + +typedef struct SgdParameter { + OpParameter op_parameter_; + float dampening_; + bool use_nesterov_; + float weight_decay_; +} SgdParameter; + +#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_OPTIMIZER_H_ diff --git a/mindspore/lite/nnacl/fp32_grad/pack_ext.c b/mindspore/lite/nnacl/fp32_grad/pack_ext.c index fd11c3da8b..645ad8dc2d 100644 --- a/mindspore/lite/nnacl/fp32_grad/pack_ext.c +++ b/mindspore/lite/nnacl/fp32_grad/pack_ext.c @@ -20,10 +20,8 @@ static int is_a_ge_zero_and_a_lt_b(int a, int b) { return (unsigned)(a) < (unsigned)(b); } void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param) { - const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_l_; - // const int pad_right = /*conv_param->pad_r_*/conv_param->pad_w_; - const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_u_; - // const int pad_down = /*conv_param->pad_d/*/conv_param->pad_h_; + const int pad_left = conv_param->pad_l_; + const int pad_up = conv_param->pad_u_; const int stride_h = conv_param->stride_h_; const int stride_w = conv_param->stride_w_; @@ -39,10 +37,11 @@ void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param const int output_h = conv_param->output_h_; const int output_w = conv_param->output_w_; + const int channels = conv_param->input_channel_ / conv_param->group_; const int tot_channels = conv_param->input_channel_; - int /*channel,*/ kernel_row, kernel_col, output_rows, output_col; + int kernel_row, kernel_col, output_rows, output_col; int row_stride_offset = 0; @@ -71,11 +70,9 @@ void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param } // output matrix is (kernel_h*kernel_w*channels)X(output_h*output_w) -void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param) { - const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_l_; - // const int pad_right = /*conv_param->pad_r_*/conv_param->pad_w_; - const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_u_; - // const int pad_down = /*conv_param->pad_d/*/conv_param->pad_h_; +void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose) { + const int pad_left = conv_param->pad_l_; + const int pad_up = conv_param->pad_u_; const int stride_h = conv_param->stride_h_; const int stride_w = conv_param->stride_w_; @@ -86,38 +83,67 @@ void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param const int kernel_h = conv_param->kernel_h_; const int kernel_w = conv_param->kernel_w_; - const int in_height = conv_param->input_h_; - const int in_width = conv_param->input_w_; + const int in_height = (transpose) ? conv_param->output_h_ : conv_param->input_h_; + const int in_width = (transpose) ? conv_param->output_w_ : conv_param->input_w_; - const int output_h = conv_param->output_h_; - const int output_w = conv_param->output_w_; - const int channels = conv_param->input_channel_ / conv_param->group_; - const int tot_channels = conv_param->input_channel_; + const int output_h = (transpose) ? conv_param->input_h_ : conv_param->output_h_; + const int output_w = (transpose) ? conv_param->input_w_ : conv_param->output_w_; + const int tot_channels = (transpose) ? conv_param->output_channel_ : conv_param->input_channel_; + const int channels = tot_channels / conv_param->group_; int channel, kernel_row, kernel_col, output_rows, output_col; - for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { - for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { - for (channel = 0; channel < channels; channel++) { - int input_row = -pad_up + kernel_row * dilation_h; - for (output_rows = output_h; output_rows; output_rows--) { - if (!is_a_ge_zero_and_a_lt_b(input_row, in_height)) { - for (output_col = output_w; output_col; output_col--) { - *(data_row++) = 0; + if (transpose) { + for (channel = 0; channel < channels; channel++) { + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + int input_row = -pad_up + kernel_row * dilation_h; + for (output_rows = output_h; output_rows; output_rows--) { + if (!is_a_ge_zero_and_a_lt_b(input_row, in_height)) { + for (output_col = output_w; output_col; output_col--) { + *(data_row++) = 0; + } + } else { + int input_col = -pad_left + kernel_col * dilation_w; + for (output_col = output_w; output_col; output_col--) { + if (is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + const int offset = (input_row * in_width + input_col) * tot_channels + channel; + *(data_row++) = in_data[offset]; + } else { + *(data_row++) = 0; + } + input_col += stride_w; + } } - } else { - int input_col = -pad_left + kernel_col * dilation_w; - for (output_col = output_w; output_col; output_col--) { - if (is_a_ge_zero_and_a_lt_b(input_col, in_width)) { - const int offset = (input_row * in_width + input_col) * tot_channels + channel; - *(data_row++) = in_data[offset]; - } else { + input_row += stride_h; + } + } + } + } + } else { + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + for (channel = 0; channel < channels; channel++) { + int input_row = -pad_up + kernel_row * dilation_h; + for (output_rows = output_h; output_rows; output_rows--) { + if (!is_a_ge_zero_and_a_lt_b(input_row, in_height)) { + for (output_col = output_w; output_col; output_col--) { *(data_row++) = 0; } - input_col += stride_w; + } else { + int input_col = -pad_left + kernel_col * dilation_w; + for (output_col = output_w; output_col; output_col--) { + if (is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + const int offset = (input_row * in_width + input_col) * tot_channels + channel; + *(data_row++) = in_data[offset]; + } else { + *(data_row++) = 0; + } + input_col += stride_w; + } } + input_row += stride_h; } - input_row += stride_h; } } } @@ -125,10 +151,8 @@ void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param } void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param) { - const int pad_left = /*conv_param->pad_l_*/ conv_param->pad_l_; - // const int pad_right = /*conv_param->pad_r_*/conv_param->pad_w_; - const int pad_up = /*conv_param->pad_u_*/ conv_param->pad_u_; - // const int pad_down = /*conv_param->pad_d/*/conv_param->pad_h_; + const int pad_left = conv_param->pad_l_; + const int pad_up = conv_param->pad_u_; const int stride_h = conv_param->stride_h_; const int stride_w = conv_param->stride_w_; diff --git a/mindspore/lite/nnacl/fp32_grad/pack_ext.h b/mindspore/lite/nnacl/fp32_grad/pack_ext.h index 1eb1e4593a..aa5f33faa7 100644 --- a/mindspore/lite/nnacl/fp32_grad/pack_ext.h +++ b/mindspore/lite/nnacl/fp32_grad/pack_ext.h @@ -23,7 +23,7 @@ extern "C" { #endif void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param); -void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param); +void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose); void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param); #ifdef __cplusplus } diff --git a/mindspore/lite/nnacl/fp32_grad/pooling_grad.c b/mindspore/lite/nnacl/fp32_grad/pooling_grad.c index 87d55504df..ddf1b197cc 100644 --- a/mindspore/lite/nnacl/fp32_grad/pooling_grad.c +++ b/mindspore/lite/nnacl/fp32_grad/pooling_grad.c @@ -17,7 +17,7 @@ #include #include "nnacl/fp32_grad/pooling_grad.h" -void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param) { +void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id) { int stride_w = pooling_param->stride_w_; int stride_h = pooling_param->stride_h_; int pad_w = pooling_param->pad_l_; @@ -41,7 +41,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter for (uint16_t yh = 0; yh < output_h; yh++) { for (uint16_t yw = 0; yw < output_w; yw++) { for (uint16_t ic = 0; ic < channel; ic++) { - int idx = (yw + yh * output_w) * channel + ic; // (ic*in_h*in_w) + (in_w*yh) + yw; + int idx = (yw + yh * output_w) * channel + ic; float delta = inPtr[idx] / kk; for (int32_t kh = 0; kh < win_h; kh++) { int xh = yh * stride_h + kh - pad_h; @@ -63,7 +63,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter } void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy_ptr, float *output_ptr, - PoolingParameter *pooling_param) { + PoolingParameter *pooling_param, int task_id) { int stride_w = pooling_param->stride_w_; int stride_h = pooling_param->stride_h_; int pad_w = pooling_param->pad_l_; diff --git a/mindspore/lite/nnacl/fp32_grad/pooling_grad.h b/mindspore/lite/nnacl/fp32_grad/pooling_grad.h index 80fd98ccbd..005f13384a 100644 --- a/mindspore/lite/nnacl/fp32_grad/pooling_grad.h +++ b/mindspore/lite/nnacl/fp32_grad/pooling_grad.h @@ -22,9 +22,9 @@ #ifdef __cplusplus extern "C" { #endif -void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param); +void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter *pooling_param, int task_id); void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy_ptr, float *output_ptr, - PoolingParameter *pooling_param); + PoolingParameter *pooling_param, int task_id); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/schema/model.fbs b/mindspore/lite/schema/model.fbs index 3705b2ad0a..eb47373c9d 100644 --- a/mindspore/lite/schema/model.fbs +++ b/mindspore/lite/schema/model.fbs @@ -207,6 +207,7 @@ union PrimitiveType { LshProjection, HashtableLookup, SkipGram, + DeConv2DGradFilter, CustomPredict, CustomNormalize, CustomExtractFeatures, @@ -215,6 +216,7 @@ union PrimitiveType { Rfft, FftReal, FftImag, + Sgd, } enum QuantType: int { diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index 4928f59c72..db7440a0da 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -407,6 +407,27 @@ table DeConv2D { hasBias: bool = false; activationType: ActivationType = 0; } + +table DeConv2DGradFilter { + format: Format = 0; + group: int; + channelIn: int; + channelOut: int; + kernelW: int; + kernelH: int; + strideW: int; + strideH: int; + padMode: PadMode; + padUp: int; + padDown: int; + padLeft: int; + padRight: int; + dilateW: int; + dilateH: int; + hasBias: bool = false; + activationType: ActivationType = 0; +} + table BNGrad { eps : float; momentum: float; @@ -884,6 +905,11 @@ table ApplyMomentum { useNesterov: bool; } +table Sgd { + weightDecay: float; + dampening: float; + useNesterov: bool; +} table Where{ condition: [bool]; diff --git a/mindspore/lite/src/common/file_utils_ext.cc b/mindspore/lite/src/common/file_utils_ext.cc index e3043b456a..49e5f7a369 100644 --- a/mindspore/lite/src/common/file_utils_ext.cc +++ b/mindspore/lite/src/common/file_utils_ext.cc @@ -45,7 +45,7 @@ int CompareRelativeOutput(float *output_data, std::string file_path) { return 1; } size_t output_num = output_size / sizeof(float); - int error = CompareOutputRelativeData(output_data, ground_truth, output_num); + float error = CompareOutputRelativeData(output_data, ground_truth, output_num); delete[] ground_truth; if (error > 1e-4) { return 1; diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc index 8ccd593a2d..623fef3d14 100644 --- a/mindspore/lite/src/lite_kernel.cc +++ b/mindspore/lite/src/lite_kernel.cc @@ -18,6 +18,22 @@ #include namespace mindspore::kernel { + +void *LiteKernel::workspace_ = nullptr; + +void LiteKernel::AllocWorkspace(size_t size) { + if (size == 0) return; + workspace_ = malloc(size); + if (workspace_ == nullptr) { + MS_LOG(ERROR) << "fail to alloc " << size; + } +} + +void LiteKernel::FreeWorkspace() { + free(workspace_); + workspace_ = nullptr; +} + void LiteKernel::InitOutTensorRefCount() { for (auto *tensor : this->out_tensors_) { tensor->SetRefCount(this->out_kernels_.size()); diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index 2d9e2e0c7a..a8409b2417 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -18,6 +18,7 @@ #define MINDSPORE_LITE_SRC_LITE_KERNEL_H_ #include #include +#include #include "src/ops/primitive_c.h" #include "src/common/utils.h" #ifdef ENABLE_ARM @@ -145,6 +146,11 @@ class LiteKernel { void set_desc(const KernelKey kernel_key) { desc_ = kernel_key; } const mindspore::lite::PrimitiveC *GetPrimitive() const { return primitive_; } + void SetWorkspaceSize(size_t value) { workspace_size_ = value; } + size_t GetWorkspaceSize() { return workspace_size_; } + static void AllocWorkspace(size_t size); + static void FreeWorkspace(); + void *GetWorkspace() { return workspace_; } protected: bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()) && true; } @@ -161,6 +167,8 @@ class LiteKernel { std::vector out_kernels_; bool train_mode_ = false; bool is_model_output_ = false; + size_t workspace_size_ = 0; + static void *workspace_; }; class SubGraphKernel : public LiteKernel { diff --git a/mindspore/lite/src/ops/apply_momentum.cc b/mindspore/lite/src/ops/apply_momentum.cc index 12a061522c..14918d9699 100644 --- a/mindspore/lite/src/ops/apply_momentum.cc +++ b/mindspore/lite/src/ops/apply_momentum.cc @@ -17,6 +17,10 @@ namespace mindspore { namespace lite { #ifdef PRIMITIVE_WRITEABLE +float ApplyMomentum::GetGradientScale() const { return this->primitive_->value.AsApplyMomentum()->gradientScale; } +bool ApplyMomentum::GetUseLocking() const { return this->primitive_->value.AsApplyMomentum()->useLocking; } +bool ApplyMomentum::GetUseNesterov() const { return this->primitive_->value.AsApplyMomentum()->useNesterov; } + int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vector &inputs) { if (this->primitive_ == nullptr) { this->primitive_ = new (std::nothrow) schema::PrimitiveT; @@ -36,6 +40,10 @@ int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vectorgradientScale = GetValue(prim.GetAttr("gradient_scale")); + attr->useLocking = GetValue(prim.GetAttr("use_locking")); + attr->useNesterov = GetValue(prim.GetAttr("use_nesterov")); + this->primitive_->value.value = attr.release(); if (this->primitive_->value.value == nullptr) { MS_LOG(ERROR) << "new primitiveT value failed"; @@ -45,6 +53,10 @@ int ApplyMomentum::UnPackAttr(const Primitive &prim, const std::vectorprimitive_->value_as_ApplyMomentum()->gradientScale(); } +bool ApplyMomentum::GetUseLocking() const { return this->primitive_->value_as_ApplyMomentum()->useLocking(); } +bool ApplyMomentum::GetUseNesterov() const { return this->primitive_->value_as_ApplyMomentum()->useNesterov(); } + int ApplyMomentum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { MS_ASSERT(nullptr != primitive); MS_ASSERT(nullptr != fbb); @@ -53,7 +65,7 @@ int ApplyMomentum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatb MS_LOG(ERROR) << "value_as_ApplyMomentum return nullptr"; return RET_ERROR; } - auto val_offset = schema::CreateApplyMomentum(*fbb); + auto val_offset = schema::CreateApplyMomentum(*fbb, attr->gradientScale(), attr->useLocking(), attr->useNesterov()); auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_ApplyMomentum, val_offset.o); fbb->Finish(prim_offset); return RET_OK; @@ -62,7 +74,7 @@ int ApplyMomentum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatb int ApplyMomentum::InferShape(std::vector inputs, std::vector outputs) { if (5 != inputs.size()) { - MS_LOG(ERROR) << "ApplyMomentum should have at 5 input tensors"; + MS_LOG(ERROR) << "ApplyMomentum should have at least 5 input tensors"; return RET_ERROR; } @@ -76,6 +88,7 @@ int ApplyMomentum::InferShape(std::vector inputs, std::vector
  • set_data_type(inputs[0]->data_type()); out->SetFormat(inputs[0]->GetFormat()); + out->set_shape({1}); } return RET_OK; diff --git a/mindspore/lite/src/ops/apply_momentum.h b/mindspore/lite/src/ops/apply_momentum.h index 67ef1b5fc8..4f3d96aef3 100644 --- a/mindspore/lite/src/ops/apply_momentum.h +++ b/mindspore/lite/src/ops/apply_momentum.h @@ -39,6 +39,9 @@ class ApplyMomentum : public PrimitiveC { int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; #endif int InferShape(std::vector inputs_, std::vector outputs_) override; + float GetGradientScale() const; + bool GetUseLocking() const; + bool GetUseNesterov() const; }; } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/bias_grad.cc b/mindspore/lite/src/ops/bias_grad.cc index d561e42503..6da4712224 100644 --- a/mindspore/lite/src/ops/bias_grad.cc +++ b/mindspore/lite/src/ops/bias_grad.cc @@ -89,6 +89,7 @@ int BiasGrad::InferShape(std::vector inputs, std::vector out auto *out = outputs.front(); MS_ASSERT(in0 != nullptr); MS_ASSERT(out != nullptr); + auto inshape = in0->shape(); int ndim = inshape.size(); for (int i = 0; i < ndim - 1; i++) { diff --git a/mindspore/lite/src/ops/bn_grad.cc b/mindspore/lite/src/ops/bn_grad.cc index 3ee696ca94..8b6ebb321b 100644 --- a/mindspore/lite/src/ops/bn_grad.cc +++ b/mindspore/lite/src/ops/bn_grad.cc @@ -75,7 +75,7 @@ float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps() float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } #endif int BNGrad::InferShape(std::vector inputs, std::vector outputs) { - if (5 != inputs.size()) { + if (6 != inputs.size()) { MS_LOG(ERROR) << "BNGrad should have five inputs"; return RET_ERROR; } @@ -85,6 +85,7 @@ int BNGrad::InferShape(std::vector inputs, std::vectorset_shape(in->shape()); outputs[1]->set_shape(scale->shape()); outputs[2]->set_shape(scale->shape()); diff --git a/mindspore/lite/src/ops/bn_grad.h b/mindspore/lite/src/ops/bn_grad.h index beb794e795..45120ca58a 100644 --- a/mindspore/lite/src/ops/bn_grad.h +++ b/mindspore/lite/src/ops/bn_grad.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_H_ -#define LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_H_ +#ifndef MINDSPORE_LITE_SRC_OPS_BN_GRAD_H_ +#define MINDSPORE_LITE_SRC_OPS_BN_GRAD_H_ #include #include @@ -44,4 +44,4 @@ class BNGrad : public PrimitiveC { } // namespace lite } // namespace mindspore -#endif // LITE_MINDSPORE_LITE_C_OPS_B_N_GRAD_INPUT_H_ +#endif // MINDSPORE_LITE_SRC_OPS_BN_GRAD_H_ diff --git a/mindspore/lite/src/ops/fused_batchnorm.cc b/mindspore/lite/src/ops/fused_batchnorm.cc index 5a05680d26..a08451af54 100644 --- a/mindspore/lite/src/ops/fused_batchnorm.cc +++ b/mindspore/lite/src/ops/fused_batchnorm.cc @@ -73,5 +73,20 @@ float FusedBatchNorm::GetMomentum() const { return this->primitive_->value_as_Fu int FusedBatchNorm::GetSpatial() const { return this->primitive_->value_as_FusedBatchNorm()->spatial(); } #endif +int FusedBatchNorm::InferShape(std::vector inputs_, std::vector outputs_) { + for (size_t i = 0; i < inputs_.size(); i++) { + if (outputs_.size() <= i) break; + outputs_.at(i)->set_shape(inputs_.at(i)->shape()); + outputs_.at(i)->set_data_type(inputs_.at(i)->data_type()); + outputs_.at(i)->SetFormat(inputs_.at(i)->GetFormat()); + } + if (outputs_.size() > 5) { + outputs_.at(5)->set_data_type(inputs_.at(0)->data_type()); + outputs_.at(5)->SetFormat(inputs_.at(0)->GetFormat()); + outputs_.at(5)->set_shape({1}); + } + return 0; +} + } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/fused_batchnorm.h b/mindspore/lite/src/ops/fused_batchnorm.h index 2707bd9188..c73b083942 100644 --- a/mindspore/lite/src/ops/fused_batchnorm.h +++ b/mindspore/lite/src/ops/fused_batchnorm.h @@ -39,6 +39,7 @@ class FusedBatchNorm : public PrimitiveC { int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; #endif + int InferShape(std::vector inputs_, std::vector outputs_) override; float GetEpsilon() const; float GetMomentum() const; int GetSpatial() const; diff --git a/mindspore/lite/src/ops/pooling_grad.cc b/mindspore/lite/src/ops/pooling_grad.cc index bc82884ea8..dc100de7d3 100644 --- a/mindspore/lite/src/ops/pooling_grad.cc +++ b/mindspore/lite/src/ops/pooling_grad.cc @@ -145,7 +145,15 @@ int PoolingGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuf #endif int PoolingGrad::InferShape(std::vector inputs_, std::vector outputs_) { - MS_ASSERT(this->primitive != nullptr); + if (3 != inputs_.size()) { + MS_LOG(ERROR) << "Pooling Grad Filter should have 3 inputs"; + return RET_ERROR; + } + if (1 != outputs_.size()) { + MS_LOG(ERROR) << "Pooling Grad Filter should have one output"; + return RET_ERROR; + } + auto input = inputs_.at(0); MS_ASSERT(input != nullptr); int input_h = input->shape().at(1); diff --git a/mindspore/lite/src/ops/primitive_c.cc b/mindspore/lite/src/ops/primitive_c.cc index 42fc24647b..87662121f4 100644 --- a/mindspore/lite/src/ops/primitive_c.cc +++ b/mindspore/lite/src/ops/primitive_c.cc @@ -151,6 +151,7 @@ #include "src/ops/depend.h" #include "src/ops/flatten_grad.h" #include "src/ops/log_grad.h" +#include "src/ops/sgd.h" #endif namespace mindspore { @@ -384,7 +385,7 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Flatten") { return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "FusedBatchNorm") { + } else if ((op_type == "FusedBatchNorm") || (op_type == "FusedBatchNormEx")) { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "make_tuple") { return NewPrimitiveC(prim, inputs, quantType); @@ -452,7 +453,7 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Conv2DBackpropInput") { return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "BatchNormGrad") { + } else if ((op_type == "BatchNormGrad") || (op_type == "FusedBatchNormGradEx")) { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "FlattenGrad") { return NewPrimitiveC(prim, inputs, quantType); @@ -460,6 +461,10 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Tile") { return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "PowerGrad") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "SGD") { + return NewPrimitiveC(prim, inputs, quantType); #else } else if (op_type == "Conv2DBackpropInput") { return NewPrimitiveC(prim, inputs, quantType); @@ -731,6 +736,8 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { return new NegGrad(primitive); case schema::PrimitiveType_LogGrad: return new LogGrad(primitive); + case schema::PrimitiveType_Sgd: + return new Sgd(primitive); #endif default: @@ -995,6 +1002,8 @@ PrimitiveC *PrimitiveC::Create(const schema::Primitive *primitive) { return NewPrimitiveC(primitive); case schema::PrimitiveType_LogGrad: return NewPrimitiveC(primitive); + case schema::PrimitiveType_Sgd: + return NewPrimitiveC(primitive); #endif default: MS_LOG(ERROR) << "Unsupported primitive type in Create : " << schema::EnumNamePrimitiveType(op_type); diff --git a/mindspore/lite/src/ops/sgd.cc b/mindspore/lite/src/ops/sgd.cc new file mode 100644 index 0000000000..038d1e0d68 --- /dev/null +++ b/mindspore/lite/src/ops/sgd.cc @@ -0,0 +1,97 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/ops/sgd.h" +namespace mindspore { +namespace lite { +#ifdef PRIMITIVE_WRITEABLE +float Sgd::GetWeightDecay() const { return this->primitive_->value.AsSgd()->weightDecay; } +float Sgd::GetDampening() const { return this->primitive_->value.AsSgd()->dampening; } +bool Sgd::GetUseNesterov() const { return this->primitive_->value.AsSgd()->useNesterov; } + +int Sgd::UnPackAttr(const Primitive &prim, const std::vector &inputs) { + if (this->primitive_ == nullptr) { + this->primitive_ = new (std::nothrow) schema::PrimitiveT; + if (this->primitive_ == nullptr) { + MS_LOG(ERROR) << "new primitiveT failed"; + return RET_ERROR; + } + this->primitive_->value.type = schema::PrimitiveType_Sgd; + } + if (this->primitive_->value.type != schema::PrimitiveType_Sgd) { + MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; + return RET_ERROR; + } + if (this->primitive_->value.value == nullptr) { + auto attr = std::make_unique(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + attr->weightDecay = GetValue(prim.GetAttr("weight_decay")); + attr->dampening = GetValue(prim.GetAttr("dampening")); + attr->useNesterov = GetValue(prim.GetAttr("nesterov")); + + this->primitive_->value.value = attr.release(); + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + } + return RET_OK; +} +#else +float Sgd::GetWeightDecay() const { return this->primitive_->value_as_Sgd()->weightDecay(); } +float Sgd::GetDampening() const { return this->primitive_->value_as_Sgd()->dampening(); } +bool Sgd::GetUseNesterov() const { return this->primitive_->value_as_Sgd()->useNesterov(); } + +int Sgd::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { + MS_ASSERT(nullptr != primitive); + MS_ASSERT(nullptr != fbb); + auto attr = primitive->value_as_Sgd(); + if (attr == nullptr) { + MS_LOG(ERROR) << "value_as_Sgd return nullptr"; + return RET_ERROR; + } + auto val_offset = schema::CreateSgd(*fbb, attr->weightDecay(), attr->dampening(), attr->useNesterov()); + auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Sgd, val_offset.o); + fbb->Finish(prim_offset); + return RET_OK; +} +#endif + +int Sgd::InferShape(std::vector inputs, std::vector outputs) { + if (6 != inputs.size()) { + MS_LOG(ERROR) << "Sgd should have at least 6 input tensors"; + return RET_ERROR; + } + + if (inputs[0]->ElementsNum() != inputs[1]->ElementsNum() || inputs[0]->ElementsNum() != inputs[3]->ElementsNum() || + inputs[2]->ElementsNum() != 1 || inputs[4]->ElementsNum() != 1) { + MS_LOG(ERROR) << "error input data size!"; + return RET_ERROR; + } + if (!outputs.empty()) { + auto *out = outputs.front(); + MS_ASSERT(out != nullptr); + out->set_data_type(inputs[0]->data_type()); + out->SetFormat(inputs[0]->GetFormat()); + out->set_shape({1}); + } + + return RET_OK; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/ops/sgd.h b/mindspore/lite/src/ops/sgd.h new file mode 100644 index 0000000000..f5b6326ee3 --- /dev/null +++ b/mindspore/lite/src/ops/sgd.h @@ -0,0 +1,49 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_OPS_SGD_H_ +#define MINDSPORE_LITE_SRC_OPS_SGD_H_ + +#include +#include +#include +#include + +#include "src/ops/primitive_c.h" + +namespace mindspore { +namespace lite { +class Sgd : public PrimitiveC { + public: +#ifdef PRIMITIVE_WRITEABLE + MS_DECLARE_PARENT(Sgd, PrimitiveC); + Sgd() = default; + explicit Sgd(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} + int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; +#else + Sgd() = default; + + int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; +#endif + int InferShape(std::vector inputs_, std::vector outputs_) override; + float GetWeightDecay() const; + float GetDampening() const; + bool GetUseNesterov() const; +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_SRC_OPS_SGD_H_ diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index f9d93610c5..3c4f5b631e 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -633,6 +633,7 @@ OpParameter *PopulateFusedBatchNorm(const mindspore::lite::PrimitiveC *primitive auto param = reinterpret_cast(const_cast(primitive)); batch_norm_param->epsilon_ = param->GetEpsilon(); + batch_norm_param->momentum_ = param->GetMomentum(); batch_norm_param->fused_ = true; return reinterpret_cast(batch_norm_param); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc index bd97b59ac4..4741ed5c29 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.cc @@ -37,6 +37,14 @@ void FusedBatchnormCPUKernel::FreeScaleAndOffset() { free(offset_); offset_ = nullptr; } + if (save_mean_ != nullptr) { + free(save_mean_); + save_mean_ = nullptr; + } + if (save_variance_ != nullptr) { + free(save_variance_); + save_variance_ = nullptr; + } } int FusedBatchnormCPUKernel::InitConstTensor() { @@ -49,8 +57,11 @@ int FusedBatchnormCPUKernel::InitConstTensor() { offset_ = malloc(offset->Size()); mean_ = malloc(mean->Size()); variance_ = malloc(variance->Size()); + save_mean_ = malloc(mean->Size()); + save_variance_ = malloc(variance->Size()); - if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr) { + if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr || save_mean_ == nullptr || + save_variance_ == nullptr) { FreeMeanAndVariance(); FreeScaleAndOffset(); MS_LOG(ERROR) << "Memory allocation failed"; @@ -60,6 +71,15 @@ int FusedBatchnormCPUKernel::InitConstTensor() { memcpy(offset_, offset->MutableData(), offset->Size()); memcpy(mean_, mean->MutableData(), mean->Size()); memcpy(variance_, variance->MutableData(), variance->Size()); + memset(save_mean_, 0, mean->Size()); + memset(save_variance_, 0, variance->Size()); + if (out_tensors_.size() > 4) { + for (size_t i = 1; i < out_tensors_.size(); i++) { + auto *data = static_cast(out_tensors_[i]->MutableData()); + std::fill(data, data + out_tensors_[i]->ElementsNum(), 0.f); + } + } + return RET_OK; } @@ -70,15 +90,23 @@ int FusedBatchnormCPUKernel::Run() { return ret; } auto param = reinterpret_cast(op_parameter_); - if (is_train()) { + if (is_train() && in_tensors_.size() >= 5) { float *in = static_cast(in_tensors_[0]->MutableData()); - float *run_mean = static_cast(out_tensors_[1]->MutableData()); - float *run_var = static_cast(out_tensors_[2]->MutableData()); - float *save_mean = static_cast(out_tensors_[3]->MutableData()); - float *save_inv_var = static_cast(out_tensors_[4]->MutableData()); - std::fill(run_mean, run_mean + param->channel_, 0.f); - std::fill(run_var, run_var + param->channel_, 0.f); - FusedBatchNormFp32MeanVar(in, 0.9, run_mean, run_var, param, save_mean, save_inv_var); + float *scale = static_cast(in_tensors_[1]->MutableData()); + float *bias = static_cast(in_tensors_[2]->MutableData()); + float *mean = static_cast(in_tensors_[3]->MutableData()); + float *var = static_cast(in_tensors_[4]->MutableData()); + std::fill(mean, mean + in_tensors_[3]->ElementsNum(), 0.f); + std::fill(var, var + in_tensors_[4]->ElementsNum(), 0.f); + FusedBatchNormFp32MeanVar(in, mean, var, param, static_cast(save_mean_), + static_cast(save_variance_)); + memcpy(out_tensors_[3]->MutableData(), save_mean_, out_tensors_[3]->Size()); + memcpy(out_tensors_[4]->MutableData(), save_variance_, out_tensors_[3]->Size()); + memcpy(mean_, mean, in_tensors_[3]->Size()); + memcpy(variance_, var, in_tensors_[4]->Size()); + memcpy(scale_, scale, in_tensors_[1]->Size()); + memcpy(offset_, bias, in_tensors_[2]->Size()); + trained_ = true; // trained at least once } ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); if (ret != RET_OK) { @@ -87,6 +115,24 @@ int FusedBatchnormCPUKernel::Run() { return ret; } +void FusedBatchnormCPUKernel::eval() { + LiteKernel::eval(); + if (trained_) { + float *run_mean = static_cast(in_tensors_[3]->MutableData()); + float *run_var = static_cast(in_tensors_[4]->MutableData()); + float *scale = static_cast(in_tensors_[1]->MutableData()); + float *bias = static_cast(in_tensors_[2]->MutableData()); + // Copy to input tensors for Model export + memcpy(run_mean, save_mean_, in_tensors_[3]->Size()); + memcpy(run_var, save_variance_, in_tensors_[4]->Size()); + // Copy to local variables + memcpy(mean_, run_mean, in_tensors_[3]->Size()); + memcpy(variance_, run_var, in_tensors_[4]->Size()); + memcpy(scale_, scale, in_tensors_[1]->Size()); + memcpy(offset_, bias, in_tensors_[2]->Size()); + } +} + int FusedBatchnormCPUKernel::DoExecute(int task_id) { auto param = reinterpret_cast(op_parameter_); FusedBatchNormFp32(in_tensors_.at(0)->MutableData(), scale_, offset_, mean_, variance_, param, task_id, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h index 615f1070d0..e1a42e2776 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h @@ -29,6 +29,7 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } + void eval() override; int ReSize() override; int Run() override; int InitConstTensor() override; @@ -38,6 +39,9 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { void FreeScaleAndOffset(); void *scale_ = nullptr; void *offset_ = nullptr; + void *save_mean_ = nullptr; + void *save_variance_ = nullptr; + bool trained_ = false; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc index 3559fd9f8e..12ede4a061 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/activation_grad.cc @@ -32,7 +32,13 @@ using mindspore::schema::ActivationType_RELU6; using mindspore::schema::PrimitiveType_ActivationGrad; namespace mindspore::kernel { -int ActivationGradCPUKernel::Init() { return RET_OK; } +int ActivationGradCPUKernel::Init() { + if (2 != in_tensors_.size()) { + MS_LOG(ERROR) << "ActivationGrad should have 2 input tensors"; + return RET_ERROR; + } + return RET_OK; +} int ActivationGradCPUKernel::ReSize() { return RET_OK; } @@ -42,22 +48,32 @@ int ActivationGradCPUKernel::DoActivation(int task_id) { auto output_addr = reinterpret_cast(out_tensors_.at(0)->MutableData()); int length = in_tensors_.at(0)->ElementsNum(); + int stride = UP_DIV(length, thread_count_); + int count = MSMIN(stride, length - stride * task_id); + auto error_code = RET_OK; if (param_act_grad_->type_ == schema::ActivationType_RELU) { - error_code = ReluGrad(yt_addr, input_addr, length, output_addr); + error_code = + ReluGrad(yt_addr + stride * task_id, input_addr + stride * task_id, count, output_addr + stride * task_id); } else if (param_act_grad_->type_ == schema::ActivationType_RELU6) { - error_code = Relu6Grad(yt_addr, input_addr, length, output_addr); + error_code = + Relu6Grad(yt_addr + stride * task_id, input_addr + stride * task_id, count, output_addr + stride * task_id); } else if (param_act_grad_->type_ == schema::ActivationType_LEAKY_RELU) { - error_code = LReluGrad(yt_addr, input_addr, length, output_addr, param_act_grad_->alpha_); + error_code = LReluGrad(yt_addr + stride * task_id, input_addr + stride * task_id, count, + output_addr + stride * task_id, param_act_grad_->alpha_); } else if (param_act_grad_->type_ == schema::ActivationType_SIGMOID) { - error_code = SigmoidGrad(yt_addr, input_addr, length, output_addr); + error_code = + SigmoidGrad(yt_addr + stride * task_id, input_addr + stride * task_id, count, output_addr + stride * task_id); } else if (param_act_grad_->type_ == schema::ActivationType_TANH) { - error_code = TanhGrad(yt_addr, input_addr, length, output_addr); + error_code = + TanhGrad(yt_addr + stride * task_id, input_addr + stride * task_id, count, output_addr + stride * task_id); } else if (param_act_grad_->type_ == schema::ActivationType_HSWISH) { - error_code = HSwishGrad(yt_addr, input_addr, length, output_addr); + error_code = + HSwishGrad(yt_addr + stride * task_id, input_addr + stride * task_id, count, output_addr + stride * task_id); } else if (param_act_grad_->type_ == schema::ActivationType_HSIGMOID) { - error_code = HSigmoidGrad(yt_addr, input_addr, length, output_addr); + error_code = + HSigmoidGrad(yt_addr + stride * task_id, input_addr + stride * task_id, count, output_addr + stride * task_id); } else { MS_LOG(ERROR) << "Activation type error"; return RET_ERROR; @@ -81,13 +97,13 @@ int ActivationGradRun(void *cdata, int task_id) { int ActivationGradCPUKernel::Run() { auto ret = Prepare(); if (ret != RET_OK) { - MS_LOG(ERROR) << "Prepare failed."; + MS_LOG(ERROR) << "ActivationGradCPUKernel Prepare failed."; return ret; } - int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRun, this, thread_count_); + int error_code = ParallelLaunch(this->context_->thread_pool_, ActivationGradRun, this, 1); if (error_code != RET_OK) { - MS_LOG(ERROR) << "Activation function error error_code[" << error_code << "]"; + MS_LOG(ERROR) << "Activation Grad function error error_code[" << error_code << "]"; return RET_ERROR; } return RET_OK; @@ -107,7 +123,7 @@ kernel::LiteKernel *CpuActivationGradFp32KernelCreator(const std::vectorInit(); if (ret != RET_OK) { - MS_LOG(ERROR) << "InferShape kernel failed, name: " << opParameter->name_ << ", type: " + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); delete kernel; return nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc index 46356b2209..8f53a60c3d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/apply_momentum.cc @@ -19,6 +19,7 @@ #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" #include "src/runtime/kernel/arm/fp32/nchw2nhwc.h" using mindspore::kernel::KERNEL_ARCH::kCPU; @@ -31,13 +32,7 @@ namespace mindspore::kernel { int ApplyMomentumCPUKernel::ReSize() { return RET_OK; } -int ApplyMomentumCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; - return prepare_ret; - } - +int ApplyMomentumCPUKernel::Execute(int task_id) { auto weight = reinterpret_cast(in_tensors_[0]->MutableData()); auto accumulate = reinterpret_cast(in_tensors_[1]->MutableData()); float learning_rate = reinterpret_cast(in_tensors_[2]->MutableData())[0]; @@ -45,9 +40,41 @@ int ApplyMomentumCPUKernel::Run() { float moment = reinterpret_cast(in_tensors_[4]->MutableData())[0]; size_t elem_num = in_tensors_[0]->ElementsNum(); - for (size_t i = 0; i < elem_num; ++i) { - accumulate[i] = accumulate[i] * moment + gradient[i]; // * (1.0 - moment); - weight[i] -= accumulate[i] * learning_rate; + if (apply_momentum_param_->use_nesterov_) { + for (size_t i = 0; i < elem_num; ++i) { + accumulate[i] = accumulate[i] * moment + gradient[i]; + weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; + } + } else { + for (size_t i = 0; i < elem_num; ++i) { + accumulate[i] = accumulate[i] * moment + gradient[i]; + weight[i] -= accumulate[i] * learning_rate; + } + } + return RET_OK; +} + +int ApplyMomentumRun(void *cdata, int task_id) { + auto applyMomentum_kernel = reinterpret_cast(cdata); + auto error_code = applyMomentum_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "apply Momentum run error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int ApplyMomentumCPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "ApplyMomentumCPUKernel Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + + int error_code = ParallelLaunch(this->context_->thread_pool_, ApplyMomentumRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Apply Momentum function error error_code[" << error_code << "]"; + return RET_ERROR; } return RET_OK; } @@ -77,6 +104,7 @@ kernel::LiteKernel *CpuApplyMomentumFp32KernelCreator(const std::vector #include "src/lite_kernel.h" +#include "nnacl/fp32_grad/optimizer.h" namespace mindspore::kernel { class ApplyMomentumCPUKernel : public LiteKernel { @@ -26,11 +27,17 @@ class ApplyMomentumCPUKernel : public LiteKernel { explicit ApplyMomentumCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + : LiteKernel(parameter, inputs, outputs, ctx, primitive), apply_momentum_param_(nullptr) { + apply_momentum_param_ = reinterpret_cast(parameter); + } ~ApplyMomentumCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); + + private: + ApplyMomentumParameter *apply_momentum_param_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc index b56fd8fcd2..980f22f381 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc @@ -20,6 +20,7 @@ #include "nnacl/fp32_grad/reduce_grad.h" #include "nnacl/fp32_grad/arithmetic_grad.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -36,14 +37,13 @@ int ArithmeticGradCPUKernel::Init() { MS_ASSERT(dx2 != nullptr); if ((Type() == PrimitiveType_MulGrad) || (Type() == PrimitiveType_DivGrad)) { - // if (inShape0.size() < inShape1.size()) if (dx1->ElementsNum() < dx2->ElementsNum()) { if (Type() == PrimitiveType_MulGrad) arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul2L; else if (Type() == PrimitiveType_DivGrad) arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv2L; - } else if (dx2->ElementsNum() < dx1->ElementsNum()) { // if (inShape0.size() > inShape1.size()) + } else if (dx2->ElementsNum() < dx1->ElementsNum()) { if (Type() == PrimitiveType_MulGrad) arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMul1L; else if (Type() == PrimitiveType_DivGrad) @@ -157,7 +157,6 @@ void ArithmeticGradCPUKernel::ArithmeticGradDiv1L(float *dy, int dy_size, float ReduceSumByAxes(tile_data2, arithmeticParameter_->in_shape0_, dx2, arithmeticParameter_->in_shape1_, arithmeticParameter_->ndim_); for (int i = 0; i < dx2_size; i++) dx2[i] = -dx2[i]; - // ReduceNegSumPrefix(tile_data2, dy_size, dx2, dx2_size); //then reduce into dx2 // broadcasting x2 BroadcastDiv(dy, x2_data, tile_data0, tile_data1, dx1, dy_size, arithmeticParameter_); // broadcast directly to dx1 @@ -180,7 +179,7 @@ void ArithmeticGradCPUKernel::ArithmeticGradDiv2L(float *dy, int dy_size, float int ArithmeticGradCPUKernel::ReSize() { return RET_OK; } -int ArithmeticGradCPUKernel::Run() { +int ArithmeticGradCPUKernel::Execute(int task_id) { auto dy = reinterpret_cast(in_tensors_[0]->MutableData()); auto dx1 = reinterpret_cast(out_tensors_[0]->MutableData()); auto dx2 = reinterpret_cast(out_tensors_[1]->MutableData()); @@ -192,6 +191,30 @@ int ArithmeticGradCPUKernel::Run() { return RET_OK; } +int ArithmeticGradRun(void *cdata, int task_id) { + auto Arithmetic_kernel = reinterpret_cast(cdata); + auto error_code = Arithmetic_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "ArithmeticGradRun error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int ArithmeticGradCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ArithmeticGradCPUKernel Prepare failed."; + return ret; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, ArithmeticGradRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Arithmetic Grad function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + kernel::LiteKernel *CpuArithmeticGradFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h index 479b2d1e48..7f480daf4c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h @@ -68,6 +68,7 @@ class ArithmeticGradCPUKernel : public LiteKernel { int InferShape(); int ReSize() override; int Run() override; + int Execute(int task_id); private: void ArithmeticGradAdd(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc index 0b8cf33ade..0da128cf48 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.cc @@ -19,6 +19,7 @@ #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -43,14 +44,9 @@ int BiasGradCPUKernel::Init() { return RET_OK; } -int BiasGradCPUKernel::ReSize() { return 0; } +int BiasGradCPUKernel::ReSize() { return RET_OK; } -int BiasGradCPUKernel::Run() { - auto ret = Prepare(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Prepare failed."; - return RET_ERROR; - } +int BiasGradCPUKernel::Execute(int task_id) { auto in = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto out = reinterpret_cast(out_tensors_.at(0)->MutableData()); @@ -69,6 +65,30 @@ int BiasGradCPUKernel::Run() { return RET_OK; } +int BiasGradRun(void *cdata, int task_id) { + auto bias_kernel = reinterpret_cast(cdata); + auto error_code = bias_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "bias error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int BiasGradCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "BiasGradCPUKernel Prepare failed."; + return RET_ERROR; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, BiasGradRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + kernel::LiteKernel *CpuBiasGradFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h index 7d874fc72f..43f4cf389b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bias_grad.h @@ -35,6 +35,7 @@ class BiasGradCPUKernel : public LiteKernel { int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); private: ArithmeticParameter *bias_param; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc index ce9bb95dd3..d9f422011e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc @@ -21,6 +21,7 @@ #include "src/kernel_registry.h" #include "nnacl/fp32_grad/batch_norm.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -33,23 +34,13 @@ namespace mindspore::kernel { int BNGradCPUKernel::Init() { auto *input_x = in_tensors_.at(1); int channels = input_x->shape().at(kNHWC_C); - workspace_size = 4 * channels; - workspace = new (std::nothrow) float[workspace_size]; - if (workspace == nullptr) { - MS_LOG(ERROR) << "new workspace fail!"; - return RET_ERROR; - } + SetWorkspaceSize(4 * channels * sizeof(float)); return RET_OK; } int BNGradCPUKernel::ReSize() { return RET_OK; } -int BNGradCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; - return prepare_ret; - } +int BNGradCPUKernel::Execute(int task_id) { auto bn_param = reinterpret_cast(op_parameter_); auto *input_yt = in_tensors_.at(0); auto *input_x = in_tensors_.at(1); @@ -61,7 +52,9 @@ int BNGradCPUKernel::Run() { int channels = input_x->Channel(); int spatial = input_x->Height() * input_x->Width(); float eps = bn_param->epsilon_; - std::fill(workspace, workspace + workspace_size, 0.f); + + float *workspace = static_cast(GetWorkspace()); + std::fill(workspace, workspace + GetWorkspaceSize() / sizeof(*workspace), 0.f); float *mean = workspace; float *invar = mean + channels; float *dxhat_sum = invar + channels; @@ -82,6 +75,33 @@ int BNGradCPUKernel::Run() { return RET_OK; } +int BNGradRun(void *cdata, int task_id) { + auto bn_kernel = reinterpret_cast(cdata); + if (task_id == 0) { + auto error_code = bn_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "BNGradRun error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + } + return RET_OK; +} + +int BNGradCPUKernel::Run() { + // std::cout << "run succ" << std::endl; + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "BNGradCPUKernel Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, BNGradRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "BN function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + kernel::LiteKernel *CpuBNGradFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h index 4bbbce34a6..cc2b57b8cc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.h @@ -27,18 +27,12 @@ class BNGradCPUKernel : public LiteKernel { explicit BNGradCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr), workspace_size(0) {} - ~BNGradCPUKernel() override { - if (workspace) delete[] workspace; - } - + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + ~BNGradCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; - - private: - float *workspace; - int workspace_size; + int Execute(int task_id); }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_BN_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc index 1c375bcbcf..3193971ef4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc @@ -18,6 +18,7 @@ #include "nnacl/fp32_grad/pack_ext.h" #include "nnacl/fp32_grad/gemm.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::RET_ERROR; @@ -25,6 +26,14 @@ using mindspore::lite::RET_OK; namespace mindspore::kernel { int ConvolutionTrainCPUKernel::Init() { + if (2 != in_tensors_.size()) { + MS_LOG(ERROR) << "Convolution should have two inputs"; + return RET_ERROR; + } + if (1 != out_tensors_.size()) { + MS_LOG(ERROR) << "Convolution should have one output"; + return RET_ERROR; + } auto conv_param_ = reinterpret_cast(op_parameter_); auto *input_x = in_tensors_.at(kInputIndex); auto *input_weight = in_tensors_.at(kWeightIndex); @@ -46,22 +55,13 @@ int ConvolutionTrainCPUKernel::Init() { int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_; - workspace = new (std::nothrow) float[ws_size]; - if (workspace == nullptr) { - MS_LOG(ERROR) << "new workspace fail!"; - return RET_ERROR; - } + SetWorkspaceSize(ws_size * sizeof(float)); return RET_OK; } int ConvolutionTrainCPUKernel::ReSize() { return RET_OK; } -int ConvolutionTrainCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; - return prepare_ret; - } +int ConvolutionTrainCPUKernel::Execute(int task_id) { auto conv_param_ = reinterpret_cast(op_parameter_); auto *input_x = in_tensors_.at(kInputIndex); auto *input_w = in_tensors_.at(kWeightIndex); @@ -86,6 +86,7 @@ int ConvolutionTrainCPUKernel::Run() { int m = out_h * out_w; int n = out_ch / groups; int k = k_h * k_w * in_ch / groups; + float *workspace = static_cast(GetWorkspace()); memset(y_addr, 0, out_y->Size()); @@ -99,6 +100,31 @@ int ConvolutionTrainCPUKernel::Run() { gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch); } } + + return RET_OK; +} + +int ConvolutionTrainRun(void *cdata, int task_id) { + auto conv_kernel = reinterpret_cast(cdata); + auto error_code = conv_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "ConvolutionTrainRun error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int ConvolutionTrainCPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "ConvolutionTrainCPUKernel Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionTrainRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "conv train function error error_code[" << error_code << "]"; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h index fee2a38f32..dd92d28183 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h @@ -26,17 +26,13 @@ class ConvolutionTrainCPUKernel : public LiteKernel { explicit ConvolutionTrainCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {} - ~ConvolutionTrainCPUKernel() override { - if (workspace) delete[] workspace; - } + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + ~ConvolutionTrainCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; - - private: - float *workspace; + int Execute(int task_id); }; kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc index 4c703e2874..cf54e4675b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc @@ -20,6 +20,7 @@ #include "nnacl/fp32_grad/pack_ext.h" #include "nnacl/fp32_grad/gemm.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -50,26 +51,16 @@ int ConvolutionGradFilterCPUKernel::Init() { conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; - int ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * - conv_param->input_channel_ / conv_param->group_; - - workspace = new (std::nothrow) float[ws_size]; - if (workspace == nullptr) { - MS_LOG(ERROR) << "new workspace fail!"; - return RET_ERROR; - } + size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * + conv_param->input_channel_ / conv_param->group_; + SetWorkspaceSize(ws_size * sizeof(float)); return RET_OK; } int ConvolutionGradFilterCPUKernel::ReSize() { return RET_OK; } -int ConvolutionGradFilterCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; - return prepare_ret; - } +int ConvolutionGradFilterCPUKernel::Execute(int task_id) { auto conv_param = reinterpret_cast(op_parameter_); auto *input_dy = in_tensors_.at(0); auto *input_x = in_tensors_.at(1); @@ -84,8 +75,8 @@ int ConvolutionGradFilterCPUKernel::Run() { int in_ch = conv_param->input_channel_; int in_h = conv_param->input_h_; int in_w = conv_param->input_w_; - int k_h = conv_param->kernel_h_; // out_dw->shape()[1]; - int k_w = conv_param->kernel_w_; // out_dw->shape()[2]; + int k_h = conv_param->kernel_h_; + int k_w = conv_param->kernel_w_; int batch = conv_param->output_batch_; int out_ch = conv_param->output_channel_; int groups = conv_param->group_; @@ -96,6 +87,8 @@ int ConvolutionGradFilterCPUKernel::Run() { int n = k_h * k_w * in_ch / groups; int k = out_ch / groups; + float *workspace = reinterpret_cast(GetWorkspace()); + // zero out pointer memset(dw_addr, 0, out_dw->Size()); @@ -104,15 +97,39 @@ int ConvolutionGradFilterCPUKernel::Run() { float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups); float *mat_b = workspace; float *mat_c = dw_addr + j * nweights / groups; - float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups); + float *im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups); - im2row_hwc(im, mat_b, conv_param); + im2row_hwc(im, mat_b, conv_param, false); gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n); } } return RET_OK; } +int ConvolutionGradFilterRun(void *cdata, int task_id) { + auto convfilter_kernel = reinterpret_cast(cdata); + auto error_code = convfilter_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "ConvolutionGradFilterRun error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int ConvolutionGradFilterCPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "ConvolutionGradFilterCPUKernel Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionGradFilterRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "conv filter function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + kernel::LiteKernel *CpuConvGradFilterFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h index afb3624935..a8eaefdafc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h @@ -26,17 +26,14 @@ class ConvolutionGradFilterCPUKernel : public LiteKernel { explicit ConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {} - ~ConvolutionGradFilterCPUKernel() override { - if (workspace) delete[] workspace; - } + + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + ~ConvolutionGradFilterCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; - - private: - float *workspace = nullptr; + int Execute(int task_id); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc index c8a370c29f..57c94fc8f2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc @@ -20,6 +20,7 @@ #include "nnacl/fp32_grad/pack_ext.h" #include "nnacl/fp32_grad/gemm.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -50,26 +51,16 @@ int ConvolutionGradInputCPUKernel::Init() { conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; - int ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * - conv_param->input_channel_ / conv_param->group_; + size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * + conv_param->input_channel_ / conv_param->group_; - workspace = new (std::nothrow) float[ws_size]; - if (workspace == nullptr) { - MS_LOG(ERROR) << "new workspace fail!"; - return RET_ERROR; - } + SetWorkspaceSize(ws_size * sizeof(float)); return RET_OK; } -int ConvolutionGradInputCPUKernel::ReSize() { return 0; } - -int ConvolutionGradInputCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; - return prepare_ret; - } +int ConvolutionGradInputCPUKernel::ReSize() { return RET_OK; } +int ConvolutionGradInputCPUKernel::Execute(int task_id) { auto conv_param = reinterpret_cast(op_parameter_); auto *input_dy = in_tensors_.at(0); auto *input_w = in_tensors_.at(1); @@ -95,6 +86,7 @@ int ConvolutionGradInputCPUKernel::Run() { int m = out_h * out_w; int n = k_w * k_h * in_ch / groups; int k = out_ch / groups; + float *workspace = reinterpret_cast(GetWorkspace()); memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w); @@ -107,6 +99,32 @@ int ConvolutionGradInputCPUKernel::Run() { col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param); } } + + return RET_OK; +} + +int ConvolutionGradInputRun(void *cdata, int task_id) { + auto convinput_kernel = reinterpret_cast(cdata); + auto error_code = convinput_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "conv input error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int ConvolutionGradInputCPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "ConvolutionGradInputCPUKernel Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + + int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionGradInputRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "bias function error error_code[" << error_code << "]"; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h index beb0cc1987..6bea61b59c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h @@ -26,17 +26,13 @@ class ConvolutionGradInputCPUKernel : public LiteKernel { explicit ConvolutionGradInputCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), workspace(nullptr) {} - ~ConvolutionGradInputCPUKernel() override { - if (workspace) delete[] workspace; - } + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + ~ConvolutionGradInputCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; - - private: - float *workspace; + int Execute(int task_id); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc new file mode 100644 index 0000000000..67c86ae01b --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc @@ -0,0 +1,155 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h" +#include "src/kernel_registry.h" +#include "nnacl/pack.h" +#include "nnacl/fp32_grad/pack_ext.h" +#include "nnacl/fp32_grad/gemm.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_DeConv2DGradFilter; + +namespace mindspore::kernel { +int DeConvolutionGradFilterCPUKernel::Init() { + // dy is in input 0 + // x is in input 1 + // dw is output 0 + + auto *x_tensor = in_tensors_.at(1); + MS_ASSERT(x_tensor != nullptr); + auto *dy_tensor = in_tensors_.at(0); + MS_ASSERT(dy_tensor != nullptr); + + auto conv_param = reinterpret_cast(op_parameter_); + conv_param->output_batch_ = dy_tensor->shape().at(kNHWC_N); + conv_param->input_batch_ = x_tensor->shape().at(kNHWC_N); + conv_param->input_h_ = x_tensor->shape().at(kNHWC_H); + conv_param->input_w_ = x_tensor->shape().at(kNHWC_W); + // assume OutCh|kh|kw|InCh + conv_param->input_channel_ = x_tensor->shape().at(kNHWC_C); + conv_param->output_channel_ = dy_tensor->shape().at(kNHWC_C); + + conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; + conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; + + int ws_size = conv_param->input_h_ * conv_param->input_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * + conv_param->output_channel_ / conv_param->group_; + + SetWorkspaceSize(ws_size * sizeof(float)); + + return RET_OK; +} + +int DeConvolutionGradFilterCPUKernel::ReSize() { return RET_OK; } + +int DeConvolutionGradFilterCPUKernel::Execute(int task_id) { + auto conv_param = reinterpret_cast(op_parameter_); + auto *input_dy = in_tensors_.at(0); + auto *input_x = in_tensors_.at(1); + auto *out_dw = out_tensors_.at(0); + + auto x_addr = reinterpret_cast(input_x->MutableData()); + auto dy_addr = reinterpret_cast(input_dy->MutableData()); + auto dw_addr = reinterpret_cast(out_dw->MutableData()); + + int i, j; + int in_ch = conv_param->input_channel_; + int in_h = conv_param->input_h_; + int in_w = conv_param->input_w_; + int k_h = conv_param->kernel_h_; + int k_w = conv_param->kernel_w_; + int batch = conv_param->output_batch_; + int out_ch = conv_param->output_channel_; + int groups = conv_param->group_; + int out_h = conv_param->output_h_; + int out_w = conv_param->output_w_; + + int m = in_ch / groups; + int n = k_h * k_w * out_ch / groups; + int k = in_h * in_w; + + float *workspace = reinterpret_cast(GetWorkspace()); + // zero out pointer + memset(dw_addr, 0, out_dw->Size()); + for (i = 0; i < batch; ++i) { + for (j = 0; j < groups; ++j) { + float *mat_a = x_addr + (i * (in_ch * in_h * in_w) + j * (in_ch / groups)); + float *mat_b = workspace; + float *mat_c = dw_addr + j * m; + float *im = dy_addr + (i * (out_h * out_w * out_ch) + j * (out_ch / groups)); + im2row_hwc(im, mat_b, conv_param, true); + gemm(0, 0, n, m, k, 1, mat_b, k, mat_a, in_ch, 1, mat_c, in_ch); + } + } + return RET_OK; +} + +int DeConvolutionGradFilterRun(void *cdata, int task_id) { + auto convfilter_kernel = reinterpret_cast(cdata); + auto error_code = convfilter_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "DeConvolutionGradFilterRun error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int DeConvolutionGradFilterCPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + + int error_code = ParallelLaunch(this->context_->thread_pool_, DeConvolutionGradFilterRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "conv filter function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +kernel::LiteKernel *CpuDeConvGradFilterFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::InnerContext *ctx, + const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + MS_ASSERT(opParameter != nullptr); + MS_ASSERT(desc.type == schema::PrimitiveType_DeConv2DGradFilter); + + auto *kernel = new (std::nothrow) DeConvolutionGradFilterCPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "new kernel fail!"; + return nullptr; + } + + auto ret = kernel->Init(); + if (RET_OK != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DeConv2DGradFilter, CpuDeConvGradFilterFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h new file mode 100644 index 0000000000..0737cb1009 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h @@ -0,0 +1,40 @@ +/** + * Copyright 2019 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DECONVOLUTION_GRAD_FILTER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DECONVOLUTION_GRAD_FILTER_H_ + +#include +#include "src/lite_kernel.h" + +namespace mindspore::kernel { +class DeConvolutionGradFilterCPUKernel : public LiteKernel { + public: + explicit DeConvolutionGradFilterCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + ~DeConvolutionGradFilterCPUKernel() override {} + + int Init() override; + int ReSize() override; + int Run() override; + int Execute(int task_id); +}; + +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DECONVOLUTION_GRAD_FILTER_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h index dbc62434aa..26ca5156b8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/make_tuple.h @@ -36,6 +36,7 @@ class MakeTupleCPUKernel : public LiteKernel { int Init() override; int ReSize() override; int Run() override; + int DoActivation(int task_id); private: OpParameter *param; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc index 6be058706e..b5a3dd7758 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.cc @@ -20,6 +20,8 @@ #include "nnacl/fp32/pooling.h" #include "nnacl/fp32_grad/pooling_grad.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" +// #include "src/train/ops/train_ops.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -60,12 +62,7 @@ int PoolingGradCPUKernel::Init() { int PoolingGradCPUKernel::ReSize() { return RET_OK; } -int PoolingGradCPUKernel::Run() { - auto prepare_ret = Prepare(); - if (prepare_ret != RET_OK) { - MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; - return prepare_ret; - } +int PoolingGradCPUKernel::Execute(int task_id) { PoolingParameter *pool_param = reinterpret_cast(op_parameter_); auto input_ptr = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto output_ptr = reinterpret_cast(out_tensors_.at(0)->MutableData()); @@ -73,9 +70,41 @@ int PoolingGradCPUKernel::Run() { if (pool_param->pool_mode_ == PoolMode_MaxPool) { auto dx_ptr = reinterpret_cast(in_tensors_.at(1)->MutableData()); auto dy_ptr = reinterpret_cast(in_tensors_.at(2)->MutableData()); - MaxPoolingGrad(input_ptr, dx_ptr, dy_ptr, output_ptr, pool_param); + MaxPoolingGrad(input_ptr, dx_ptr, dy_ptr, output_ptr, pool_param, task_id); } else { - AvgPoolingGrad(input_ptr, output_ptr, pool_param); + AvgPoolingGrad(input_ptr, output_ptr, pool_param, task_id); + } + return RET_OK; +} + +int PoolingGradImpl(void *cdata, int task_id) { + auto pooling = reinterpret_cast(cdata); + auto error_code = pooling->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Pooling Run error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int PoolingGradCPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "PoolingGradCPUKernel Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + + // clear output buffer before parallel run + PoolingParameter *pooling_param = reinterpret_cast(op_parameter_); + auto output_ptr = reinterpret_cast(out_tensors_.at(0)->MutableData()); + int size = + pooling_param->input_w_ * pooling_param->input_h_ * pooling_param->input_channel_ * pooling_param->output_batch_; + for (int i = 0; i < size; i++) output_ptr[i] = 0.0; + + int error_code = ParallelLaunch(this->context_->thread_pool_, PoolingGradImpl, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; + return RET_ERROR; } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h index 190ea68301..43f6ad79ec 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/pooling_grad.h @@ -37,6 +37,9 @@ class PoolingGradCPUKernel : public LiteKernel { int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); + + private: }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc index 65607c1faf..f49d64c1c0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.cc @@ -19,6 +19,7 @@ #include "src/kernel_registry.h" #include "include/errorcode.h" #include "nnacl/fp32/arithmetic.h" +#include "src/runtime/runtime_api.h" using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; @@ -26,11 +27,21 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_PowerGrad; namespace mindspore::kernel { -int PowerGradCPUKernel::Init() { return RET_OK; } +int PowerGradCPUKernel::Init() { + if (2 != in_tensors_.size()) { + MS_LOG(ERROR) << "Power Grad Filter should have 2 inputs"; + return RET_ERROR; + } + if (1 != out_tensors_.size()) { + MS_LOG(ERROR) << "Power Grad Filter should have one output"; + return RET_ERROR; + } + return RET_OK; +} int PowerGradCPUKernel::ReSize() { return RET_OK; } -int PowerGradCPUKernel::Run() { +int PowerGradCPUKernel::Execute(int task_id) { auto dy_addr = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto x_addr = reinterpret_cast(in_tensors_.at(1)->MutableData()); auto dx_addr = reinterpret_cast(out_tensors_.at(0)->MutableData()); @@ -47,6 +58,30 @@ int PowerGradCPUKernel::Run() { return RET_OK; } +int PowerGradRun(void *cdata, int task_id) { + auto power_kernel = reinterpret_cast(cdata); + auto error_code = power_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "power grad error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int PowerGradCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "PowerGradCPUKernel Prepare failed."; + return RET_ERROR; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, PowerGradRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "power grad function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + kernel::LiteKernel *CpuPowerGradFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, const kernel::KernelKey &desc, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h index 6c1645d31d..8b1702c53a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/power_grad.h @@ -38,6 +38,7 @@ class PowerGradCPUKernel : public LiteKernel { int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); private: float power_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.cc new file mode 100644 index 0000000000..d3af29dc56 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.cc @@ -0,0 +1,121 @@ + +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/runtime/kernel/arm/fp32_grad/sgd.h" +#include "schema/model_generated.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" +#include "src/runtime/runtime_api.h" +#include "src/runtime/kernel/arm/fp32/nchw2nhwc.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Sgd; + +namespace mindspore::kernel { + +int SgdCPUKernel::ReSize() { return RET_OK; } + +int SgdCPUKernel::Execute(int task_id) { + auto weight = reinterpret_cast(in_tensors_[0]->MutableData()); + auto accumulate = reinterpret_cast(in_tensors_[3]->MutableData()); + float learning_rate = reinterpret_cast(in_tensors_[2]->MutableData())[0]; + auto gradient = reinterpret_cast(in_tensors_[1]->MutableData()); + float moment = reinterpret_cast(in_tensors_[4]->MutableData())[0]; + size_t elem_num = in_tensors_[0]->ElementsNum(); + + if (sgd_param_->use_nesterov_) { + for (size_t i = 0; i < elem_num; ++i) { + accumulate[i] = accumulate[i] * moment + gradient[i]; + weight[i] -= (accumulate[i] * moment + gradient[i]) * learning_rate; + } + } else { + for (size_t i = 0; i < elem_num; ++i) { + accumulate[i] = accumulate[i] * moment + gradient[i] * (1.f - sgd_param_->dampening_); + weight[i] -= accumulate[i] * learning_rate; + } + } + return RET_OK; +} + +int SgdRun(void *cdata, int task_id) { + auto Sgd_kernel = reinterpret_cast(cdata); + auto error_code = Sgd_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "SGD run error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int SgdCPUKernel::Run() { + auto prepare_ret = Prepare(); + if (prepare_ret != RET_OK) { + MS_LOG(ERROR) << "SgdCPUKernel Prepare fail!ret: " << prepare_ret; + return prepare_ret; + } + + int error_code = ParallelLaunch(this->context_->thread_pool_, SgdRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "SGD function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int SgdCPUKernel::Init() { + // Only for test with uninitialized Data + size_t elem_num = in_tensors_[0]->ElementsNum(); + auto accumulate = reinterpret_cast(in_tensors_[3]->MutableData()); + for (size_t i = 0; i < elem_num; i++) accumulate[i] = 0.0; + + if (sgd_param_->dampening_ < 0.0f) { + MS_LOG(ERROR) << "dampening should be at least 0.0"; + return RET_ERROR; + } + + if (sgd_param_->use_nesterov_ && sgd_param_->dampening_ > 0.0f) { + MS_LOG(ERROR) << "If use nesterov, dampening must equal to 0.0"; + return RET_ERROR; + } + + return RET_OK; +} + +kernel::LiteKernel *CpuSgdFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter, + const lite::InnerContext *ctx, const kernel::KernelKey &desc, + const lite::PrimitiveC *primitive) { + MS_ASSERT(desc.type == schema::PrimitiveType_Sgd); + auto *kernel = new (std::nothrow) SgdCPUKernel(opParameter, inputs, outputs, ctx, primitive); + MS_ASSERT(kernel != nullptr); + + auto ret = kernel->Init(); + if (0 != ret) { + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + delete kernel; + return nullptr; + } + + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Sgd, CpuSgdFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.h new file mode 100644 index 0000000000..355d0ed1e2 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sgd.h @@ -0,0 +1,44 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SGD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SGD_H_ + +#include +#include "src/lite_kernel.h" +#include "nnacl/fp32_grad/optimizer.h" + +namespace mindspore::kernel { +class SgdCPUKernel : public LiteKernel { + public: + explicit SgdCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : LiteKernel(parameter, inputs, outputs, ctx, primitive), sgd_param_(nullptr) { + sgd_param_ = reinterpret_cast(parameter); + } + ~SgdCPUKernel() override {} + int Init() override; + int ReSize() override; + int Run() override; + int Execute(int task_id); + + private: + SgdParameter *sgd_param_; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_SGD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc index 82151f60ac..494b322f60 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc @@ -20,6 +20,7 @@ #include "nnacl/fp32/softmax.h" #include "src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; @@ -56,13 +57,8 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab } output2[0] = total_loss / param_->batch_size_; } -int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { - auto ret = Prepare(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Prepare failed."; - return ret; - } +int SoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { auto ins = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto labels = reinterpret_cast(in_tensors_.at(1)->MutableData()); float *out = reinterpret_cast(out_tensors_.at(0)->MutableData()); @@ -75,6 +71,8 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { MS_ASSERT(out != nullptr); MS_ASSERT(labels != nullptr); MS_ASSERT(ins != nullptr); + float *losses_ = static_cast(GetWorkspace()); + float *sum_data_ = losses_ + data_size; std::fill(losses_, losses_ + data_size, 0); std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0); Softmax(ins, losses_, sum_data_, &sm_params_); @@ -82,6 +80,31 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { return RET_OK; } +int SoftmaxCrossEntropyWithLogitsRun(void *cdata, int task_id) { + auto softmax_kernel = reinterpret_cast(cdata); + auto error_code = softmax_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "SoftmaxCrossEntropy error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int SoftmaxCrossEntropyWithLogitsCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "SoftmaxCrossEntropyWithLogitsCPUKernel Prepare failed."; + return ret; + } + + int error_code = ParallelLaunch(this->context_->thread_pool_, SoftmaxCrossEntropyWithLogitsRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "SoftmaxCrossEntropy function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() { auto dims = in_tensors_[0]->shape(); param_->n_dim_ = 2; @@ -99,18 +122,7 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Init() { } size_t data_size = in_tensors_.at(0)->ElementsNum(); - losses_ = new (std::nothrow) float[data_size]; - if (losses_ == nullptr) { - MS_LOG(ERROR) << "failed to malloc losses!"; - return RET_ERROR; - } - - sum_data_ = new (std::nothrow) float[dims[0]]; - if (sum_data_ == nullptr) { - MS_LOG(ERROR) << "failed to malloc sum_data_!"; - return RET_ERROR; - } - + SetWorkspaceSize((data_size + dims[0]) * sizeof(float)); sm_params_.n_dim_ = 2; sm_params_.element_size_ = data_size; sm_params_.axis_ = 1; @@ -138,5 +150,4 @@ kernel::LiteKernel *CpuSoftmaxCrossEntropyFp32KernelCreator(const std::vector
  • &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LossKernel(parameter, inputs, outputs, ctx, primitive), losses_(nullptr), sum_data_(nullptr) { + : LossKernel(parameter, inputs, outputs, ctx, primitive) { param_ = reinterpret_cast(parameter); } - ~SoftmaxCrossEntropyWithLogitsCPUKernel() override { - if (losses_) delete[] losses_; - if (sum_data_) delete[] sum_data_; - } + ~SoftmaxCrossEntropyWithLogitsCPUKernel() override {} void ForwardPostExecute(const float *labels, const float *logits, float *output1, float *output2) const; - // void ForwardPostExecute(const int *labels, const float *losses, float *output) const; - // void GradPostExecute(const int *labels, const float *losses, float* grads, float *output) const; int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); private: SoftmaxCrossEntropyParameter *param_; SoftmaxParameter sm_params_; - float *losses_ = nullptr; - float *sum_data_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc index cc6732ff49..209ac702c7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.cc @@ -20,6 +20,7 @@ #include "nnacl/fp32_grad/softmax_grad.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" +#include "src/runtime/runtime_api.h" #include "include/errorcode.h" using mindspore::lite::KernelRegistrar; @@ -46,33 +47,49 @@ int SoftmaxGradCPUKernel::Init() { axis = param->axis_ = (in_dims - 1); } - int inner_size = 1; + inner_size_ = 1; for (size_t i = axis + 1; i < in_dims; i++) { - inner_size *= in_shape[i]; + inner_size_ *= in_shape[i]; } - - sum_data_ = new (std::nothrow) float[inner_size]; - if (sum_data_ == nullptr) { - MS_LOG(ERROR) << "failed to malloc sum_data_!"; - return RET_ERROR; - } - - sum_mul_ = new (std::nothrow) float[inner_size * in_shape[axis]]; - if (sum_mul_ == nullptr) { - MS_LOG(ERROR) << "failed to malloc sum_mul_!"; - return RET_ERROR; - } - + SetWorkspaceSize(inner_size_ * (1 + in_shape[axis]) * sizeof(float)); return RET_OK; } int SoftmaxGradCPUKernel::ReSize() { return RET_OK; } -int SoftmaxGradCPUKernel::Run() { +int SoftmaxGradCPUKernel::Execute(int task_id) { auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->MutableData()); auto yt_ptr = reinterpret_cast(in_tensors_.at(1)->MutableData()); auto output_ptr = reinterpret_cast(out_tensors_.at(kOutputIndex)->MutableData()); + float *sum_data_ = static_cast(GetWorkspace()); + float *sum_mul_ = sum_data_ + inner_size_; SoftmaxGrad(input_ptr, yt_ptr, output_ptr, sum_data_, sum_mul_, reinterpret_cast(op_parameter_)); + + return RET_OK; +} + +int SoftmaxGradRun(void *cdata, int task_id) { + auto softmax_kernel = reinterpret_cast(cdata); + auto error_code = softmax_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "softmax_kernel SoftmaxGradRun task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int SoftmaxGradCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "SoftmaxGradCPUKernel Prepare failed."; + return ret; + } + + int error_code = ParallelLaunch(this->context_->thread_pool_, SoftmaxGradRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "SoftmaxGradRun function error error_code[" << error_code << "]"; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h index fda77469a3..f654d6a46f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h @@ -27,21 +27,18 @@ class SoftmaxGradCPUKernel : public LiteKernel { explicit SoftmaxGradCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, const lite::PrimitiveC *primitive) - : LiteKernel(parameter, inputs, outputs, ctx, primitive), sum_data_(nullptr), sum_mul_(nullptr) { + : LiteKernel(parameter, inputs, outputs, ctx, primitive) { param = reinterpret_cast(parameter); } - ~SoftmaxGradCPUKernel() override { - if (sum_data_) delete[] sum_data_; - if (sum_mul_) delete[] sum_mul_; - } + ~SoftmaxGradCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); private: SoftmaxParameter *param; - float *sum_data_ = nullptr; - float *sum_mul_ = nullptr; + size_t inner_size_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc index e10edbf6e1..77397fa1b7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc @@ -20,6 +20,7 @@ #include "nnacl/fp32/softmax.h" #include "src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; @@ -80,13 +81,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::GradPostExecute(const int *lab return RET_OK; } -int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() { - auto ret = Prepare(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Prepare failed."; - return ret; - } - +int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { auto ins = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto labels = reinterpret_cast(in_tensors_.at(1)->MutableData()); float *out = reinterpret_cast(out_tensors_.at(0)->MutableData()); @@ -98,8 +93,11 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() { MS_ASSERT(out != nullptr); MS_ASSERT(labels != nullptr); MS_ASSERT(ins != nullptr); - std::fill(losses_, losses_ + data_size, 0); - std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0); + + float *losses_ = static_cast(GetWorkspace()); + float *sum_data_ = losses_ + data_size; + std::fill(losses_, losses_ + data_size, 0.f); + std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0.f); Softmax(ins, losses_, sum_data_, &sm_params_); if (is_train()) { GradPostExecute(labels, losses_, grads, out); @@ -109,6 +107,30 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() { return RET_OK; } +int SparseSoftmaxCrossEntropyRun(void *cdata, int task_id) { + auto sparse_kernel = reinterpret_cast(cdata); + auto error_code = sparse_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "SparseSoftmaxCrossEntropyRun error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "SparseSoftmaxCrossEntropyWithLogitsCPUKernel Prepare failed."; + return ret; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, SparseSoftmaxCrossEntropyRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "SparseSoftmaxCrossEntropy function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() { auto dims = in_tensors_[0]->shape(); param->n_dim_ = 2; @@ -125,18 +147,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Init() { return RET_ERROR; } size_t data_size = in_tensors_.at(0)->ElementsNum(); - losses_ = new (std::nothrow) float[data_size]; - if (losses_ == nullptr) { - MS_LOG(ERROR) << "failed to malloc losses!"; - return RET_ERROR; - } - - sum_data_ = new (std::nothrow) float[dims[0]]; - if (sum_data_ == nullptr) { - MS_LOG(ERROR) << "failed to malloc sum_data_!"; - return RET_ERROR; - } - + SetWorkspaceSize((data_size + dims[0]) * sizeof(float)); sm_params_.n_dim_ = 2; sm_params_.element_size_ = data_size; sm_params_.axis_ = 1; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h index e876ef7377..13e2d0f21c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.h @@ -32,13 +32,10 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel { const std::vector &outputs, const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) - : LossKernel(parameter, inputs, outputs, ctx, primitive), losses_(nullptr), sum_data_(nullptr) { + : LossKernel(parameter, inputs, outputs, ctx, primitive) { param = reinterpret_cast(parameter); } - ~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override { - if (losses_) delete[] losses_; - if (sum_data_) delete[] sum_data_; - } + ~SparseSoftmaxCrossEntropyWithLogitsCPUKernel() override {} int ForwardPostExecute(const int *labels, const float *losses, float *output) const; int GradPostExecute(const int *labels, const float *losses, float *grads, float *output) const; @@ -46,12 +43,11 @@ class SparseSoftmaxCrossEntropyWithLogitsCPUKernel : public LossKernel { int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); private: SoftmaxCrossEntropyParameter *param; SoftmaxParameter sm_params_; - float *losses_ = nullptr; - float *sum_data_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc index a47a8a6ee3..e1523c8694 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.cc @@ -19,6 +19,7 @@ #include "schema/model_generated.h" #include "src/kernel_registry.h" #include "include/errorcode.h" +#include "src/runtime/runtime_api.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::KernelRegistrar; @@ -28,16 +29,21 @@ using mindspore::schema::PrimitiveType_TupleGetItem; namespace mindspore::kernel { -int TupleGetItemCPUKernel::Init() { return RET_OK; } - -int TupleGetItemCPUKernel::ReSize() { return 0; } - -int TupleGetItemCPUKernel::Run() { - auto ret = Prepare(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Prepare failed."; +int TupleGetItemCPUKernel::Init() { + if (1 != in_tensors_.size()) { + MS_LOG(ERROR) << "Tuple Grad Filter should have one input"; + return RET_ERROR; + } + if (1 != out_tensors_.size()) { + MS_LOG(ERROR) << "Tuple Grad Filter should have one output"; return RET_ERROR; } + return RET_OK; +} + +int TupleGetItemCPUKernel::ReSize() { return RET_OK; } + +int TupleGetItemCPUKernel::Execute(int task_id) { auto in = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto out = reinterpret_cast(out_tensors_.at(0)->MutableData()); @@ -46,6 +52,30 @@ int TupleGetItemCPUKernel::Run() { return RET_OK; } +int TupleRun(void *cdata, int task_id) { + auto tuple_kernel = reinterpret_cast(cdata); + auto error_code = tuple_kernel->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "tuple grad error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int TupleGetItemCPUKernel::Run() { + auto ret = Prepare(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "TupleGetItemCPUKernel Prepare failed."; + return RET_ERROR; + } + int error_code = ParallelLaunch(this->context_->thread_pool_, TupleRun, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "tuple function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + kernel::LiteKernel *CpuTupleGetItemFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, OpParameter *opParameter, const lite::InnerContext *ctx, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h index 3881f7e5ff..b23533bd24 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/tuple_getitem.h @@ -35,6 +35,7 @@ class TupleGetItemCPUKernel : public LiteKernel { int Init() override; int ReSize() override; int Run() override; + int Execute(int task_id); private: OpParameter *param; diff --git a/mindspore/lite/src/train/train_populate_parameter.cc b/mindspore/lite/src/train/train_populate_parameter.cc index 2ebe49d9a0..b2f8e318f3 100644 --- a/mindspore/lite/src/train/train_populate_parameter.cc +++ b/mindspore/lite/src/train/train_populate_parameter.cc @@ -29,6 +29,11 @@ #include "nnacl/power_parameter.h" #include "src/ops/bias_grad.h" #include "nnacl/arithmetic_common.h" +#include "nnacl/fp32_grad/optimizer.h" +#include "src/ops/apply_momentum.h" +#include "src/ops/sgd.h" +#include "src/ops/bn_grad.h" +#include "nnacl/fp32_grad/batch_norm.h" namespace mindspore::kernel { @@ -48,6 +53,49 @@ OpParameter *DefaultPopulateParameter(const mindspore::lite::PrimitiveC *primiti return param; } +OpParameter *PopulateApplyMomentumParameter(const mindspore::lite::PrimitiveC *primitive) { + if (primitive == nullptr) { + MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; + return nullptr; + } + ApplyMomentumParameter *p = reinterpret_cast(malloc(sizeof(ApplyMomentumParameter))); + if (p == nullptr) { + MS_LOG(ERROR) << "new ApplyMomentumParameter failed."; + return nullptr; + } + p->op_parameter_.type_ = primitive->Type(); + + auto apply_momentum_primitive = + reinterpret_cast(const_cast(primitive)); + + p->grad_scale_ = apply_momentum_primitive->GetGradientScale(); + p->use_locking_ = apply_momentum_primitive->GetUseLocking(); + p->use_nesterov_ = apply_momentum_primitive->GetUseNesterov(); + + return reinterpret_cast(p); +} + +OpParameter *PopulateSgdParameter(const mindspore::lite::PrimitiveC *primitive) { + if (primitive == nullptr) { + MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; + return nullptr; + } + SgdParameter *p = reinterpret_cast(malloc(sizeof(SgdParameter))); + if (p == nullptr) { + MS_LOG(ERROR) << "new SgdParameter failed."; + return nullptr; + } + p->op_parameter_.type_ = primitive->Type(); + + auto sgd_primitive = reinterpret_cast(const_cast(primitive)); + + p->weight_decay_ = sgd_primitive->GetWeightDecay(); + p->dampening_ = sgd_primitive->GetDampening(); + p->use_nesterov_ = sgd_primitive->GetUseNesterov(); + + return reinterpret_cast(p); +} + OpParameter *PopulateSoftmaxCrossEntropyParameter(const mindspore::lite::PrimitiveC *primitive) { if (primitive == nullptr) { MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; @@ -250,9 +298,27 @@ OpParameter *PopulateBiasGradParameter(const mindspore::lite::PrimitiveC *primit return reinterpret_cast(arithmetic_param); } +OpParameter *PopulateBNGradParameter(const mindspore::lite::PrimitiveC *primitive) { + if (primitive == nullptr) { + MS_LOG(ERROR) << "Primitive is nullptr when populating parameter for op."; + return nullptr; + } + + BNGradParameter *bnGrad_param = reinterpret_cast(malloc(sizeof(BNGradParameter))); + if (bnGrad_param == nullptr) { + MS_LOG(ERROR) << "new BNGradParameter failed."; + return nullptr; + } + bnGrad_param->op_parameter_.type_ = primitive->Type(); + auto bngrad = reinterpret_cast(const_cast(primitive)); + bnGrad_param->epsilon_ = bngrad->GetEps(); + bnGrad_param->momentum_ = 0.1; + return reinterpret_cast(bnGrad_param); +} + void PopulateTrainParameters() { auto ppr = PopulateParameterRegistry::GetInstance(); - ppr->AddPopulateParameterFunc(schema::PrimitiveType_ApplyMomentum, DefaultPopulateParameter); + ppr->AddPopulateParameterFunc(schema::PrimitiveType_ApplyMomentum, PopulateApplyMomentumParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_SoftmaxCrossEntropy, PopulateSoftmaxCrossEntropyParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_ActivationGrad, PopulateActivationGradParameter); @@ -263,6 +329,8 @@ void PopulateTrainParameters() { ppr->AddPopulateParameterFunc(schema::PrimitiveType_Conv2DGradInput, PopulateConvolutionGradInputParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_PoolingGrad, PopulatePoolingGradParameter); ppr->AddPopulateParameterFunc(schema::PrimitiveType_PowerGrad, PopulatePowerGradParameter); + ppr->AddPopulateParameterFunc(schema::PrimitiveType_Sgd, PopulateSgdParameter); + ppr->AddPopulateParameterFunc(schema::PrimitiveType_BNGrad, PopulateBNGradParameter); } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc index 10e0311307..5486d2f221 100644 --- a/mindspore/lite/src/train/train_session.cc +++ b/mindspore/lite/src/train/train_session.cc @@ -14,12 +14,12 @@ * limitations under the License. */ -#include "include/train_session.h" +#include "src/train/train_session.h" #include -#include "src/common/log_adapter.h" -#include "include/context.h" -#include "include/train_model.h" +#include +#include #include "include/errorcode.h" +#include "include/train_model.h" #include "src/common/utils.h" #include "src/tensor.h" #include "src/train/loss_kernel.h" @@ -29,7 +29,8 @@ #include "src/kernel_registry.h" #include "src/runtime/kernel/arm/fp32_grad/convolution.h" -namespace mindspore::session { +namespace mindspore { +namespace lite { static size_t TSFindTensor(const std::vector &where, const lite::Tensor *searchParameter) { for (size_t i = 0; i < where.size(); i++) { @@ -42,45 +43,72 @@ static size_t TSFindTensor(const std::vector &where, const lite: TrainSession::TrainSession() { kernel::PopulateTrainParameters(); } -void TrainSession::ReplaceOps() { - mindspore::lite::KernelRegistrar tmp(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, - mindspore::schema::PrimitiveType_Conv2D, - mindspore::kernel::CpuConvTrainFp32KernelCreator); +std::vector TrainSession::ReplaceOps() { + const std::vector replace = { + {{mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, mindspore::schema::PrimitiveType_Conv2D}, + mindspore::kernel::CpuConvTrainFp32KernelCreator}, + {{mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, mindspore::schema::PrimitiveType_DepthwiseConv2D}, + mindspore::kernel::CpuConvTrainFp32KernelCreator}}; + mindspore::lite::KernelRegistry *reg = mindspore::lite::KernelRegistry::GetInstance(); + std::vector results; + for (auto v : replace) { + const CreatorOp cl = make_tuple(std::get<0>(v), reg->GetCreator(std::get<0>(v))); + results.push_back(cl); + reg->RegKernel(std::get<0>(v), std::get<1>(v)); + } + return results; +} - mindspore::lite::KernelRegistrar tmp0(mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, - mindspore::schema::PrimitiveType_DepthwiseConv2D, - mindspore::kernel::CpuConvTrainFp32KernelCreator); +void TrainSession::RestoreOps(const std::vector &restore) { + mindspore::lite::KernelRegistry *reg = mindspore::lite::KernelRegistry::GetInstance(); + for (auto v : restore) { + reg->RegKernel(std::get<0>(v), std::get<1>(v)); + } } -int TrainSession::CompileGraph(lite::Model *model) { - model_ = reinterpret_cast(model); - if (model_ == nullptr) { - MS_LOG(ERROR) << "TrainSession can only compile TrainModels"; - return lite::RET_ERROR; +void TrainSession::AllocWorkSpace() { + size_t workspace_size = 0; + for (auto k : kernels_) { + if (workspace_size < k->GetWorkspaceSize()) { + workspace_size = k->GetWorkspaceSize(); + } } + mindspore::kernel::LiteKernel::AllocWorkspace(workspace_size); +} + +int TrainSession::CompileGraph(lite::Model *model) { return lite::RET_ERROR; } - ReplaceOps(); - auto ret = LiteSession::CompileGraph(model); +int TrainSession::CompileTrainGraph(mindspore::lite::TrainModel *model) { + model_ = model; + + auto restore = ReplaceOps(); + auto ret = lite::LiteSession::CompileGraph(model); orig_output_map_ = output_node_map_; orig_output_tensor_map_ = output_tensor_map_; + for (auto inTensor : inputs_) inTensor->MutableData(); + RestoreOps(restore); + AllocWorkSpace(); return ret; } -TrainSession::~TrainSession() { delete model_; } +TrainSession::~TrainSession() { + mindspore::kernel::LiteKernel::FreeWorkspace(); + delete model_; +} void *TrainSession::ExportToBuf(char *buf, size_t *len) const { return model_->ExportBuf(buf, len); } int TrainSession::RunGraph(const session::KernelCallBack &before, const session::KernelCallBack &after) { this->outputs_.clear(); for (auto ms_tensors : output_node_map_) - for (auto ms_tensor : ms_tensors.second) this->outputs_.push_back((reinterpret_cast(ms_tensor))); - if (train_mode_) return LiteSession::RunGraph(before, after); + for (auto ms_tensor : ms_tensors.second) this->outputs_.push_back((static_cast(ms_tensor))); + if (train_mode_) return lite::LiteSession::RunGraph(before, after); // object is expected to run only inference part of graph // prepare a list of kernels till the loss function -- temporary solution std::vector inference_kernels; for (auto kernel : this->kernels_) { - if (reinterpret_cast(kernel) != nullptr) break; + if (IsLossKernel(kernel)) break; inference_kernels.push_back(kernel); } @@ -106,9 +134,10 @@ void TrainSession::Train() { output_tensor_map_.clear(); train_mode_ = true; for (auto kernel : this->kernels_) { - if (reinterpret_cast(kernel) != nullptr) { + if (IsLossKernel(kernel)) { auto *ms_tensor = kernel->out_tensors().at(0); if (ms_tensor != nullptr) { + ms_tensor->MutableData(); output_node_map_[kernel->name()].emplace_back(ms_tensor); auto index = TSFindTensor(tensors_, ms_tensor); if (index != tensors_.size()) { @@ -124,26 +153,43 @@ void TrainSession::Eval() { MS_ASSERT(nullptr != kernel); kernel->eval(); } - kernel::LiteKernel *last_kernel = nullptr; output_node_map_ = orig_output_map_; output_tensor_map_ = orig_output_tensor_map_; train_mode_ = false; for (auto kernel : this->kernels_) { - if ((reinterpret_cast(kernel) != nullptr) && (last_kernel != nullptr)) { - if (output_node_map_.find(last_kernel->name()) == output_node_map_.end()) { - auto *ms_tensor = last_kernel->out_tensors().at(0); - if (ms_tensor != nullptr) { - output_node_map_[last_kernel->name()].emplace_back(ms_tensor); - auto index = TSFindTensor(tensors_, ms_tensor); - if (index != tensors_.size()) { - output_tensor_map_.insert(std::make_pair(std::to_string(index), ms_tensor)); + if (IsLossKernel(kernel)) { + for (auto in_kernel : kernel->in_kernels()) { + if (output_node_map_.find(in_kernel->name()) == output_node_map_.end()) { + auto *ms_tensor = in_kernel->out_tensors().at(0); + if (ms_tensor != nullptr) { + output_node_map_[in_kernel->name()].emplace_back(ms_tensor); + auto index = TSFindTensor(tensors_, ms_tensor); + if (index != tensors_.size()) { + output_tensor_map_.insert(std::make_pair(std::to_string(index), ms_tensor)); + } } } } } - last_kernel = kernel; } } -} // namespace mindspore::session +bool TrainSession::IsLossKernel(kernel::LiteKernel *kernel) { + return (kernel->Type() == schema::PrimitiveType_SoftmaxCrossEntropy); +} + +} // namespace lite + +session::TrainSession *session::TrainSession::CreateSession(lite::Context *context) { + auto session = new lite::TrainSession(); + auto ret = session->Init(context); + if (ret != mindspore::lite::RET_OK) { + MS_LOG(ERROR) << "init sesssion failed"; + delete session; + return nullptr; + } + return session; +} + +} // namespace mindspore diff --git a/mindspore/lite/src/train/train_session.h b/mindspore/lite/src/train/train_session.h new file mode 100644 index 0000000000..226497aa71 --- /dev/null +++ b/mindspore/lite/src/train/train_session.h @@ -0,0 +1,94 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_TRAIN_TRAIN_SESSION_H_ +#define MINDSPORE_LITE_SRC_TRAIN_TRAIN_SESSION_H_ +#include +#include +#include +#include +#include "src/ops/primitive_c.h" +#include "include/train_session.h" +#include "include/train_model.h" +#include "src/lite_session.h" + +/* + Inheritance Diagram + + +-------------------------------+ + | session::LiteSession | + +--------+------------+---------+ + / \ + +-----------------+-----+ +-------+------------+ + | session::TrainSession | | lite::LiteSession | + +-----------------+-----+ +-------+------------+ + \ / + +--------+------------+---------+ + | lite::TrainSession | + +-------------------------------+ +*/ + +namespace mindspore { +namespace lite { + +using CreatorOp = std::tuple; +class TrainSession : virtual public session::TrainSession, virtual public lite::LiteSession { + public: + TrainSession(); + ~TrainSession(); + + int RunGraph(const session::KernelCallBack &before = nullptr, + const session::KernelCallBack &after = nullptr) override; + + int CompileGraph(lite::Model *model) override; + int CompileTrainGraph(lite::TrainModel *model) override; + + void *ExportToBuf(char *buf, size_t *len) const override; + + void Train() override; + void Eval() override; + + void BindThread(bool if_bind) override { return lite::LiteSession::BindThread(if_bind); } + std::vector GetInputs() const override { return lite::LiteSession::GetInputs(); } + mindspore::tensor::MSTensor *GetInputsByTensorName(const std::string &tensor_name) const override { + return lite::LiteSession::GetInputsByTensorName(tensor_name); + } + std::vector GetOutputsByNodeName(const std::string &node_name) const override { + return lite::LiteSession::GetOutputsByNodeName(node_name); + } + std::unordered_map GetOutputs() const override { + return lite::LiteSession::GetOutputs(); + } + + std::vector GetOutputTensorNames() const override { return lite::LiteSession::GetOutputTensorNames(); } + mindspore::tensor::MSTensor *GetOutputByTensorName(const std::string &tensor_name) const override { + return lite::LiteSession::GetOutputByTensorName(tensor_name); + } + int Resize(const std::vector &inputs, const std::vector> &dims) override { + return lite::LiteSession::Resize(inputs, dims); + } + + protected: + void AllocWorkSpace(); + virtual std::vector ReplaceOps(); + virtual void RestoreOps(const std::vector &restore); + bool IsLossKernel(kernel::LiteKernel *kernel); + TrainModel *model_ = nullptr; + std::unordered_map> orig_output_map_; + std::unordered_map orig_output_tensor_map_; +}; +} // namespace lite +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_TRAIN_TRAIN_SESSION_H_ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc index 5a2fced4bb..9f50ea2106 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc @@ -112,9 +112,13 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -146,9 +150,13 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -182,9 +190,13 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -219,9 +231,13 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -256,9 +272,13 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -291,9 +311,13 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); int loop_count = 1000; auto time_start = mindspore::lite::GetTimeUs(); @@ -336,9 +360,13 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -372,9 +400,13 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -408,9 +440,13 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -444,9 +480,13 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -480,9 +520,13 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -517,9 +561,13 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -553,9 +601,13 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc index 71c01b7dc4..67e67ca1db 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc @@ -45,10 +45,13 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) { dw_tensor.SetData(output_data); std::vector outputs = {&dw_tensor}; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bias_param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bias_param), &ctx, desc, nullptr); kernel_obj->Run(); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc index 1242008a71..8726b3c88c 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc @@ -58,19 +58,24 @@ TEST_F(TestBNGradFp32, BNGradFp32) { auto var_tensor = CreateInTensor("././test_data/bngrad/save_var_3.bin", {1, 1, 1, channels}); // prepare output tensors lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, {batch, height, width, channels}); - dx_tensor.MallocData(); + ASSERT_EQ(dx_tensor.MallocData(), 0); lite::Tensor dscale_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - dscale_tensor.MallocData(); + ASSERT_EQ(dscale_tensor.MallocData(), 0); lite::Tensor dbias_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - dbias_tensor.MallocData(); + ASSERT_EQ(dbias_tensor.MallocData(), 0); std::vector inputs = {dy_tensor, x_tensor, scale_tensor, mean_tensor, var_tensor}; std::vector outputs = {&dx_tensor, &dscale_tensor, &dbias_tensor}; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BNGrad}; + lite::InnerContext ctx; + ctx.device_type_ = lite::DT_CPU; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BNGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bn_param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bn_param), &ctx, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); for (int i = 0; i < 3; i++) { kernel_obj->Run(); @@ -107,6 +112,7 @@ TEST_F(TestBNGradFp32, BNGradFp32) { v->SetData(nullptr); delete v; } + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel_obj; MS_LOG(INFO) << "BNGradFp32 passed"; } @@ -114,6 +120,7 @@ TEST_F(TestBNGradFp32, BNGradFp32) { TEST_F(TestBNGradFp32, BNTtrainFp32) { auto bn_param = static_cast(malloc(sizeof(BatchNormParameter))); bn_param->epsilon_ = 0.00001; + bn_param->momentum_ = 0.; const int batch = 2; const int channels = 3; const int height = 4; @@ -122,22 +129,22 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) { auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels}); lite::Tensor scale_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - scale_tensor.MallocData(); + ASSERT_EQ(scale_tensor.MallocData(), 0); auto scale = reinterpret_cast(scale_tensor.MutableData()); std::fill(scale, scale + channels, 1.0f); lite::Tensor bias_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - bias_tensor.MallocData(); + ASSERT_EQ(bias_tensor.MallocData(), 0); auto bias = reinterpret_cast(bias_tensor.MutableData()); std::fill(bias, bias + channels, 1.0f); lite::Tensor mean_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - mean_tensor.MallocData(); + ASSERT_EQ(mean_tensor.MallocData(), 0); auto mean = reinterpret_cast(mean_tensor.MutableData()); std::fill(mean, mean + channels, 0.0f); lite::Tensor var_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - var_tensor.MallocData(); + ASSERT_EQ(var_tensor.MallocData(), 0); auto var = reinterpret_cast(var_tensor.MutableData()); std::fill(var, var + channels, 1.0f); @@ -146,11 +153,11 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) { lite::Tensor out_tensor(TypeId::kNumberTypeFloat32, {batch, height, width, channels}); ASSERT_EQ(out_tensor.MallocData(), 0); - lite::Tensor run_mean_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - ASSERT_EQ(run_mean_tensor.MallocData(), 0); + lite::Tensor save_scale_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); + ASSERT_EQ(save_scale_tensor.MallocData(), 0); - lite::Tensor run_var_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); - ASSERT_EQ(run_var_tensor.MallocData(), 0); + lite::Tensor save_bias_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); + ASSERT_EQ(save_bias_tensor.MallocData(), 0); lite::Tensor save_mean_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); ASSERT_EQ(save_mean_tensor.MallocData(), 0); @@ -158,7 +165,7 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) { lite::Tensor save_var_tensor(TypeId::kNumberTypeFloat32, {1, 1, 1, channels}); ASSERT_EQ(save_var_tensor.MallocData(), 0); - std::vector outputs = {&out_tensor, &run_mean_tensor, &run_var_tensor, &save_mean_tensor, + std::vector outputs = {&out_tensor, &save_scale_tensor, &save_bias_tensor, &save_mean_tensor, &save_var_tensor}; kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm}; @@ -170,26 +177,31 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) { auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bn_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); + + float *save_mean = reinterpret_cast(save_mean_tensor.MutableData()); + float *save_var = reinterpret_cast(save_var_tensor.MutableData()); + std::fill(save_mean, save_mean + channels, 0.f); + std::fill(save_var, save_var + channels, 0.f); kernel_obj->train(); kernel_obj->Run(); - float *run_mean = reinterpret_cast(run_mean_tensor.MutableData()); - float *run_var = reinterpret_cast(run_var_tensor.MutableData()); - std::cout << "================run_mean==============================\n"; - for (int i = 0; i < channels; i++) std::cout << run_mean[i] << " "; + std::cout << "================save_mean==============================\n"; + for (int i = 0; i < channels; i++) std::cout << save_mean[i] << " "; std::cout << "\n"; - std::cout << "================run_var==============================\n"; - for (int i = 0; i < channels; i++) std::cout << run_var[i] << " "; + std::cout << "===============save_var==============================\n"; + for (int i = 0; i < channels; i++) std::cout << save_var[i] << " "; std::cout << "\n"; delete[] reinterpret_cast(x_tensor->MutableData()); - auto res = mindspore::lite::CompareRelativeOutput(run_mean, "./test_data/bngrad/running_mean_3.bin"); + auto res = mindspore::lite::CompareRelativeOutput(save_mean, "./test_data/bngrad/running_mean_3.bin"); EXPECT_EQ(res, 0); - res = mindspore::lite::CompareRelativeOutput(run_var, "./test_data/bngrad/running_var_3.bin"); + res = mindspore::lite::CompareRelativeOutput(save_var, "./test_data/bngrad/running_var_3.bin"); EXPECT_EQ(res, 0); x_tensor->SetData(nullptr); delete x_tensor; + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel_obj; } } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc index 61948362d0..006a6ff81f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc @@ -107,10 +107,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { std::vector inputs = {&dy_tensor, &x_tensor}; std::vector outputs = {&dw_tensor}; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc, nullptr); - + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { kernel->Run(); @@ -134,6 +139,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { delete[] input_data; delete[] dy_data; delete[] dw_data; + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel; // delete conv_param; dw_tensor.SetData(nullptr); @@ -175,9 +181,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc, nullptr); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { @@ -203,6 +215,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { w_tensor.SetData(nullptr); dy_tensor.SetData(nullptr); dx_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel; // delete conv_param; @@ -241,10 +254,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { std::vector inputs = {&dy_tensor, &x_tensor}; std::vector outputs = {&dw_tensor}; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc, nullptr); - + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { kernel->Run(); @@ -270,6 +288,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { dw_tensor.SetData(nullptr); x_tensor.SetData(nullptr); dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel; // delete conv_param; MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; @@ -308,10 +327,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc, nullptr); - + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { kernel->Run(); @@ -338,6 +362,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { dy_tensor.SetData(nullptr); delete kernel; + mindspore::kernel::LiteKernel::FreeWorkspace(); // delete conv_param; MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; } @@ -375,9 +400,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { std::vector inputs = {&dy_tensor, &x_tensor}; std::vector outputs = {&dw_tensor}; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc, nullptr); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { @@ -403,6 +434,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { dw_tensor.SetData(nullptr); dy_tensor.SetData(nullptr); x_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel; // delete conv_param; MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; @@ -441,14 +473,15 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), NULL, desc, nullptr); - - // warm up loop - for (int i = 0; i < 3; i++) { - kernel->Run(); - } + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); int loop_count = 100; auto time_start = mindspore::lite::GetTimeUs(); @@ -469,6 +502,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { dx_tensor.SetData(nullptr); dy_tensor.SetData(nullptr); w_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel; // delete conv_param; MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; @@ -515,6 +549,8 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { auto *kernel = new mindspore::kernel::ConvolutionTrainCPUKernel(reinterpret_cast(conv_param), inputs, outputs, &context, 0); kernel->Init(); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + kernel->train(); EXPECT_EQ(kernel->is_train(), 1); @@ -543,9 +579,208 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { x_tensor.SetData(nullptr); y_tensor.SetData(nullptr); w_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel; MS_LOG(INFO) << "TestConvolutionFp32 Filter Grad passed"; } +TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 4; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 15; + conv_param->output_w_ = 15; + conv_param->output_channel_ = 12; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 2; + conv_param->stride_w_ = 2; + + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 2; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 15, 15, 12}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/conv/convfp32_input0_d2_g2_s2_2_4_32_32.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({2, 32, 32, 4}); + lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({12, 3, 3, 2}); + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_dw_d2_g2_s2_12_2_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] dy_data; + delete[] dw_data; + delete kernel; + // delete conv_param; + dw_tensor.SetData(nullptr); + x_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 4; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 15; + conv_param->output_w_ = 15; + conv_param->output_channel_ = 12; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 2; + conv_param->stride_w_ = 2; + + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 2; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 15, 15, 12}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + size_t w_size; + std::string w_path = "./test_data/conv/convfp32_w_d2_g2_s2_12_2_3_3.bin"; + auto w_data = reinterpret_cast(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); + std::vector dim_w({12, 3, 3, 2}); + lite::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w); + w_tensor.SetData(w_data); + + size_t output_data_size = + conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; + auto dx_data = new float[output_data_size]; + std::vector dim_dx({2, 32, 32, 4}); + lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); + dx_tensor.SetData(dx_data); + + std::vector inputs = {&dy_tensor, &w_tensor}; + std::vector outputs = {&dx_tensor}; + // runtime part + + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/conv/convfp32_inputdx_d2_g2_s2_2_4_32_32.bin"; + auto res = lite::CompareRelativeOutput(dx_data, output_path); + EXPECT_EQ(res, 0); + delete[] dx_data; + delete[] w_data; + delete[] dy_data; + dx_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + w_tensor.SetData(nullptr); + delete kernel; + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestConvolutionGradFp32 Filter Grad passed"; +} + } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc new file mode 100644 index 0000000000..5ca7276c6f --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc @@ -0,0 +1,634 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +// #include "utils/log_adapter.h" +#include "common/common_test.h" +#include "src/common/file_utils.h" +#include "src/common/file_utils_ext.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h" +#include "mindspore/lite/nnacl/conv_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" + +namespace mindspore { +class TestDeConvolutionGradFp32 : public mindspore::CommonTest { + public: + TestDeConvolutionGradFp32() {} +}; + +TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 3; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 63; + conv_param->output_w_ = 63; + conv_param->output_channel_ = 9; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 2; + conv_param->stride_w_ = 2; + + conv_param->dilation_h_ = 1; + conv_param->dilation_w_ = 1; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 1; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/deconv/deconvfp32_dy_2_9_63_63.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 63, 63, 9}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/deconv/deconvfp32_input0_2_3_32_32.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({2, 32, 32, 3}); + lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({3, 3, 3, 9}); + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/deconv/deconvfp32_dw_9_3_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] dy_data; + delete[] dw_data; + delete kernel; + // delete conv_param; + dw_tensor.SetData(nullptr); + x_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestDeConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 3; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 65; + conv_param->output_w_ = 65; + conv_param->output_channel_ = 9; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 2; + conv_param->stride_w_ = 2; + + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 1; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_2_9_65_65.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 65, 65, 9}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_2_3_32_32.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({2, 32, 32, 3}); + lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({9, 3, 3, 3}); + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/deconv/deconvfp32_dw_d2_9_3_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] dy_data; + delete[] dw_data; + delete kernel; + // delete conv_param; + dw_tensor.SetData(nullptr); + x_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestDeConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 3; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 65; + conv_param->output_w_ = 65; + conv_param->output_channel_ = 9; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 2; + conv_param->stride_w_ = 2; + + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 3; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_2_9_65_65.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 65, 65, 9}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_2_3_32_32.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({2, 32, 32, 3}); + lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({3, 3, 3, 3}); + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/deconv/deconvfp32_dw_d2_g3_3_3_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] dy_data; + delete[] dw_data; + delete kernel; + // delete conv_param; + dw_tensor.SetData(nullptr); + x_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestDeConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 3; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 34; + conv_param->output_w_ = 34; + conv_param->output_channel_ = 9; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 1; + conv_param->stride_w_ = 1; + + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 3; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_s1_2_9_34_34.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 34, 34, 9}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_s1_2_3_32_32.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({2, 32, 32, 3}); + lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({3, 3, 3, 3}); + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/deconv/deconvfp32_dw_d2_g3_s1_3_3_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] dy_data; + delete[] dw_data; + delete kernel; + // delete conv_param; + dw_tensor.SetData(nullptr); + x_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestDeConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 4; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 65; + conv_param->output_w_ = 65; + conv_param->output_channel_ = 12; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 2; + conv_param->stride_w_ = 2; + + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 2; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g2_s2_2_12_65_65.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 65, 65, 12}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g2_s2_2_4_32_32.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({2, 32, 32, 4}); + lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({6, 3, 3, 4}); + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/deconv/deconvfp32_dw_d2_g2_s2_6_4_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] dy_data; + delete[] dw_data; + delete kernel; + // delete conv_param; + dw_tensor.SetData(nullptr); + x_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestDeConvolutionGradFp32 Filter Grad passed"; +} + +TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { + // prepare stage + auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + conv_param->input_batch_ = 2; + conv_param->input_h_ = 32; + conv_param->input_w_ = 32; + conv_param->input_channel_ = 12; + + conv_param->output_batch_ = 2; + conv_param->output_h_ = 65; + conv_param->output_w_ = 65; + conv_param->output_channel_ = 12; + + conv_param->kernel_h_ = 3; + conv_param->kernel_w_ = 3; + + conv_param->stride_h_ = 2; + conv_param->stride_w_ = 2; + + conv_param->dilation_h_ = 2; + conv_param->dilation_w_ = 2; + + conv_param->pad_u_ = 1; + conv_param->pad_l_ = 1; + conv_param->pad_r_ = 1; + conv_param->pad_d_ = 1; + + conv_param->group_ = 12; + conv_param->act_type_ = ActType_No; + conv_param->thread_num_ = 1; + + size_t dy_size; + std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g12_s2_2_12_65_65.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + std::vector dim_dy({2, 65, 65, 12}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.SetData(dy_data); + + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + size_t output_data_size = + conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; + + size_t input_size; + std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g12_s2_2_12_32_32.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_x({2, 32, 32, 12}); + lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); + x_tensor.SetData(input_data); + + auto dw_data = new float[output_data_size]; + std::vector dim_dw({1, 3, 3, 12}); + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.SetData(dw_data); + std::vector inputs = {&dy_tensor, &x_tensor}; + std::vector outputs = {&dw_tensor}; + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); + + // warm up loop + for (int i = 0; i < 3; i++) { + kernel->Run(); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + kernel->Run(); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + std::string output_path = "./test_data/deconv/deconvfp32_dw_d2_g12_s2_12_1_3_3.bin"; + auto res = lite::CompareRelativeOutput(dw_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] dy_data; + delete[] dw_data; + delete kernel; + // delete conv_param; + dw_tensor.SetData(nullptr); + x_tensor.SetData(nullptr); + dy_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); + MS_LOG(INFO) << "TestDeConvolutionGradFp32 Filter Grad passed"; +} + +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc index 03feae94e0..47823dac44 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc @@ -26,12 +26,13 @@ #include "mindspore/lite/include/train_model.h" #include "common/common_test.h" #include "include/train_session.h" -// #include "include/lite_session.h" #include "include/context.h" #include "include/errorcode.h" #include "src/common/log_adapter.h" #include "src/common/file_utils.h" #include "src/common/file_utils_ext.h" +#include "src/kernel_registry.h" +#include "src/runtime/kernel/arm/fp32_grad/convolution.h" namespace mindspore { class NetworkTest : public mindspore::CommonTest { @@ -39,6 +40,9 @@ class NetworkTest : public mindspore::CommonTest { NetworkTest() {} }; +int32_t runNet(mindspore::session::LiteSession *session, const std::string &in, const std::string &out, + const char *tensor_name, bool debug = false); + // INPUT(0) // V // +-------------+ @@ -352,15 +356,13 @@ TEST_F(NetworkTest, tuning_layer) { ASSERT_NE(nullptr, model); meta_graph.reset(); content = nullptr; - lite::InnerContext context; + lite::Context context; context.device_type_ = lite::DT_CPU; context.cpu_bind_mode_ = lite::NO_BIND; context.thread_num_ = 1; - ASSERT_EQ(lite::RET_OK, context.Init()); - auto session = new session::TrainSession(); + auto session = session::TrainSession::CreateSession(&context); ASSERT_NE(nullptr, session); - session->Init(&context); - auto ret = session->CompileGraph(model); + auto ret = session->CompileTrainGraph(model); ASSERT_EQ(lite::RET_OK, ret); session->Train(); session->Train(); // Just double check that calling Train twice does not cause a problem @@ -469,59 +471,67 @@ int32_t fileIterator(mindspore::session::TrainSession *session, const std::strin } void replaceExt(const std::string &src, std::string *dst) { *dst = src.substr(0, src.find_last_of('.')) + ".emb"; } -int32_t runNet(mindspore::lite::LiteSession *session, const std::string &in, const std::string &out, - const char *tensor_name) { +int32_t runNet(mindspore::session::LiteSession *session, const std::string &in, const std::string &out, + const char *tensor_name, bool debug) { // setup input auto inputs = session->GetInputs(); auto inTensor = inputs.at(0); float *data = reinterpret_cast(inTensor->MutableData()); - size_t input_size; float *in_buf = reinterpret_cast(lite::ReadFile(in.c_str(), &input_size)); auto input_data = reinterpret_cast(in_buf); std::copy(input_data, input_data + inTensor->ElementsNum(), data); + std::cout << "==============Input===========================" << std::endl; + for (int i = 0; i < 10; i++) { + std::cout << data[i] << ", "; + } + std::cout << std::endl; delete[] in_buf; // execute network session->RunGraph(); - - // compare outputs auto output = session->GetOutputByTensorName(tensor_name); - float *output_data = reinterpret_cast(output->MutableData()); + if (output != nullptr) { + float *output_data = reinterpret_cast(output->MutableData()); + // compare outputs + if (debug) { + std::cout << "==============Output===========================" << std::endl; + for (int i = 0; i < 10; i++) { + std::cout << output_data[i] << ", "; + } + std::cout << std::endl; + } + return mindspore::lite::CompareRelativeOutput(output_data, out); + } - return mindspore::lite::CompareRelativeOutput(output_data, out); + return lite::RET_ERROR; } TEST_F(NetworkTest, efficient_net) { char *buf = nullptr; size_t net_size = 0; - // std::string net = "./test_data/nets/efficientnet_b0_f.ms"; std::string net = "./test_data/nets/effnetb0_fwd_nofuse.ms"; ReadFile(net.c_str(), &net_size, &buf); auto model = lite::TrainModel::Import(buf, net_size); delete[] buf; - auto context = new lite::InnerContext; + auto context = new lite::Context; context->device_type_ = lite::DT_CPU; context->cpu_bind_mode_ = lite::NO_BIND; context->thread_num_ = 1; - ASSERT_EQ(lite::RET_OK, context->Init()); - auto session = new mindspore::session::TrainSession(); + auto session = session::TrainSession::CreateSession(context); ASSERT_NE(session, nullptr); - auto ret = session->Init(context); - ASSERT_EQ(lite::RET_OK, ret); - ret = session->CompileGraph(model); + auto ret = session->CompileTrainGraph(model); ASSERT_EQ(lite::RET_OK, ret); session->Eval(); std::string in = "./test_data/nets/effNet_input_x_1_3_224_224.bin"; std::string out = "./test_data/nets/effNet_output_y_1_1000.bin"; - auto res = runNet(session, in, out, "631"); - - ASSERT_EQ(res, 0); + auto res = runNet(session, in, out, "650"); delete session; delete context; + ASSERT_EQ(res, 0); } TEST_F(NetworkTest, lenetnet) { @@ -536,19 +546,105 @@ TEST_F(NetworkTest, lenetnet) { context->cpu_bind_mode_ = lite::NO_BIND; context->thread_num_ = 1; - auto session = new mindspore::session::TrainSession(); + // check registration + mindspore::lite::KernelRegistry *reg = mindspore::lite::KernelRegistry::GetInstance(); + mindspore::kernel::KernelKey desc1 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, + mindspore::schema::PrimitiveType_Conv2D}; + mindspore::kernel::KernelKey desc2 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, + mindspore::schema::PrimitiveType_DepthwiseConv2D}; + auto regb1 = reg->GetCreator(desc1); + auto regb2 = reg->GetCreator(desc2); + ASSERT_EQ(regb1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false); + + auto session = session::TrainSession::CreateSession(context); ASSERT_NE(session, nullptr); - auto ret = session->Init(context); - ASSERT_EQ(lite::RET_OK, ret); - ret = session->CompileGraph(model); + auto ret = session->CompileTrainGraph(model); ASSERT_EQ(lite::RET_OK, ret); - session->Eval(); + auto rega1 = reg->GetCreator(desc1); + auto rega2 = reg->GetCreator(desc2); + ASSERT_EQ(regb1, rega1); + ASSERT_EQ(regb2, rega2); + ASSERT_EQ(rega1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false); + // end of check registration + + session->Eval(); std::string in = "./test_data/nets/x_lenet.bin"; std::string out = "./test_data/nets/y_lenet.bin"; auto res = runNet(session, in, out, "24"); + delete session; + delete context; + ASSERT_EQ(res, 0); +} +#if 0 +TEST_F(NetworkTest, retina_net) { + char *buf = nullptr; + size_t net_size = 0; + + std::string net = "./test_data/nets/retinaface1009.ms"; + ReadFile(net.c_str(), &net_size, &buf); + // auto model = lite::TrainModel::Import(buf, net_size); + auto model = lite::Model::Import(buf, net_size); + delete[] buf; + auto context = new lite::Context; + context->device_type_ = lite::DT_CPU; + context->cpu_bind_mode_ = lite::NO_BIND; + context->thread_num_ = 1; + + // auto session = session::TrainSession::CreateSession(context); + auto session = session::LiteSession::CreateSession(context); + ASSERT_NE(session, nullptr); + auto ret = session->CompileGraph(model); + ASSERT_EQ(lite::RET_OK, ret); + // session->Eval(); + + std::string in = "./test_data/nets/retinaface_input.f32"; + std::cout << "----- Output 0 -----" << std::endl; + std::string out = "./test_data/nets/retinaface_out_0.f32"; + auto res = runNet(session, in, out, "448", true); + ASSERT_EQ(res, 0); + + std::cout << "----- Output 1 -----" << std::endl; + out = "./test_data/nets/retinaface_out_1.f32"; + res = runNet(session, in, out, "435", true); + ASSERT_EQ(res, 0); + + std::cout << "----- Output 2 -----" << std::endl; + out = "./test_data/nets/retinaface_out_2.f32"; + res = runNet(session, in, out, "421", true); + ASSERT_EQ(res, 0); + + delete session; + delete context; +} +#endif +TEST_F(NetworkTest, mobileface_net) { + char *buf = nullptr; + size_t net_size = 0; + + std::string net = "./test_data/nets/mobilefacenet0924.ms"; + ReadFile(net.c_str(), &net_size, &buf); + // auto model = lite::TrainModel::Import(buf, net_size); + auto model = lite::Model::Import(buf, net_size); + delete[] buf; + auto context = new lite::Context; + context->device_type_ = lite::DT_CPU; + context->cpu_bind_mode_ = lite::NO_BIND; + context->thread_num_ = 1; + + // auto session = session::TrainSession::CreateSession(context); + auto session = session::LiteSession::CreateSession(context); + ASSERT_NE(session, nullptr); + auto ret = session->CompileGraph(model); + ASSERT_EQ(lite::RET_OK, ret); + // session->Eval(); + + std::string in = "./test_data/nets/facenet_input.f32"; + std::string out = "./test_data/nets/facenet_output.f32"; + auto res = runNet(session, in, out, "354", true); ASSERT_EQ(res, 0); + delete model; delete session; delete context; } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc index 2ae6ec1af5..b1e90b8f9d 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc @@ -20,12 +20,12 @@ #include "mindspore/lite/include/context.h" #include "src/common/log_adapter.h" #include "common/common_test.h" -#include "mindspore/lite/src/kernel_registry.h" #include "src/common/utils.h" #include "src/common/file_utils.h" #include "src/common/file_utils_ext.h" -#include "src/runtime/kernel/arm/fp32_grad/pooling_grad.h" #include "nnacl/fp32_grad/pooling_grad.h" +#include "src/runtime/kernel/arm/fp32_grad/pooling_grad.h" +#include "mindspore/lite/src/kernel_registry.h" namespace mindspore { class TestPoolingGradFp32 : public mindspore::CommonTest { @@ -78,13 +78,13 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { auto output_data = new float[output_data_size]; // warm up loop for (int i = 0; i < 3; i++) { - AvgPoolingGrad(input_data, output_data, pooling_param); + AvgPoolingGrad(input_data, output_data, pooling_param, 1); } int loop_count = 100; auto time_start = mindspore::lite::GetTimeUs(); for (int i = 0; i < loop_count; i++) { - AvgPoolingGrad(input_data, output_data, pooling_param); + AvgPoolingGrad(input_data, output_data, pooling_param, 1); } auto time_end = mindspore::lite::GetTimeUs(); auto cost = time_end - time_start; @@ -140,10 +140,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { dx_tensor.SetData(output_data); std::vector outputs = {&dx_tensor}; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), &context, desc, nullptr); kernel_obj->Run(); @@ -201,10 +205,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { auto output_data = reinterpret_cast(dx_tensor.MutableData()); std::vector outputs = {&dx_tensor}; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), &context, desc, nullptr); kernel_obj->Run(); @@ -259,17 +267,22 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { float *out_data = static_cast(out_tensor.MutableData()); std::vector inputs = {&yt_tensor, &x_tensor}; std::vector outputs = {&out_tensor}; - // ---------------------------------------- + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); - auto kernel = pool_creator(inputs, outputs, reinterpret_cast(pool), NULL, pool_desc, nullptr); + auto kernel = pool_creator(inputs, outputs, reinterpret_cast(pool), &context, pool_desc, nullptr); kernel->Init(); auto time_start = mindspore::lite::GetTimeUs(); kernel->Run(); auto time_end = mindspore::lite::GetTimeUs(); - printf("single thread running time : %llu ms\n", time_end - time_start); + printf("single thread running time : %lu ms\n", time_end - time_start); std::string output_path = "./test_data/pooling/avgpoolgradfp32_s2_dx_3_28_28_3.bin"; auto res = lite::CompareRelativeOutput(out_data, output_path); @@ -319,17 +332,22 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { std::vector inputs = {&yt_tensor, &x_tensor}; std::vector outputs = {&out_tensor}; - // ---------------------------------------- + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); - auto kernel = pool_creator(inputs, outputs, reinterpret_cast(pool), NULL, pool_desc, nullptr); + auto kernel = pool_creator(inputs, outputs, reinterpret_cast(pool), &context, pool_desc, nullptr); kernel->Init(); auto time_start = mindspore::lite::GetTimeUs(); kernel->Run(); auto time_end = mindspore::lite::GetTimeUs(); - printf("single thread running time : %llu ms\n", time_end - time_start); + printf("single thread running time : %lu ms\n", time_end - time_start); std::string output_path = "./test_data/pooling/avgpoolgradfp32_s3_dx_3_28_28_3.bin"; auto res = lite::CompareRelativeOutput(out_data, output_path); @@ -371,13 +389,13 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { auto output_data = new float[output_data_size]; // warm up loop for (int i = 0; i < 3; i++) { - MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param); + MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param, 1); } int loop_count = 100; auto time_start = mindspore::lite::GetTimeUs(); for (int i = 0; i < loop_count; i++) { - MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param); + MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param, 1); } auto time_end = mindspore::lite::GetTimeUs(); auto cost = time_end - time_start; @@ -435,10 +453,15 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { auto out_data = static_cast(out_tensor.MutableData()); std::vector maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; std::vector maxpool_outputs = {&out_tensor}; - // ---------------------------------------- + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); - auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), NULL, + auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), &context, maxpool_desc, nullptr); kernel->Init(); @@ -446,7 +469,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { auto time_start = mindspore::lite::GetTimeUs(); kernel->Run(); auto time_end = mindspore::lite::GetTimeUs(); - printf("single thread running time : %llu ms\n", time_end - time_start); + printf("single thread running time : %lu ms\n", time_end - time_start); std::string output_path = "./test_data/pooling/maxpoolgradfp32_1_xgrad_3_28_28_3.bin"; auto res = lite::CompareRelativeOutput(out_data, output_path); @@ -505,10 +528,15 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { std::vector maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; std::vector maxpool_outputs = {&out_tensor}; - // ---------------------------------------- + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); - auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), NULL, + auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), &context, maxpool_desc, nullptr); kernel->Init(); @@ -516,7 +544,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { auto time_start = mindspore::lite::GetTimeUs(); kernel->Run(); auto time_end = mindspore::lite::GetTimeUs(); - printf("single thread running time : %llu ms\n", time_end - time_start); + printf("single thread running time : %lu ms\n", time_end - time_start); std::string output_path = "./test_data/pooling/maxpoolgradfp32_s2_xgrad_3_28_28_3.bin"; auto res = lite::CompareRelativeOutput(out_data, output_path); @@ -575,10 +603,15 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { std::vector maxpool_inputs = {&x_tensor, &y_tensor, &yt_tensor}; std::vector maxpool_outputs = {&out_tensor}; - // ---------------------------------------- + + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); - auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), NULL, + auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), &context, maxpool_desc, nullptr); kernel->Init(); @@ -586,7 +619,7 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { auto time_start = mindspore::lite::GetTimeUs(); kernel->Run(); auto time_end = mindspore::lite::GetTimeUs(); - printf("single thread running time : %llu ms\n", time_end - time_start); + printf("single thread running time : %lu ms\n", time_end - time_start); std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin"; auto res = lite::CompareRelativeOutput(out_data, output_path); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc index 26b2abf277..650d5be587 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc @@ -59,9 +59,15 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { grad_tensor.SetData(grad); std::vector outputs = {&loss_tensor, &grad_tensor}; + lite::InnerContext context; + context.device_type_ = lite::DT_CPU; + context.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, context.Init()); + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftmaxCrossEntropy}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel_obj = creator(inputs, outputs, reinterpret_cast(sce_param), NULL, desc, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(sce_param), &context, desc, nullptr); + mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); kernel_obj->Run(); printf("==================total loss=================\n"); @@ -92,6 +98,7 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { y_tensor.SetData(nullptr); loss_tensor.SetData(nullptr); grad_tensor.SetData(nullptr); + mindspore::kernel::LiteKernel::FreeWorkspace(); delete kernel_obj; MS_LOG(INFO) << "SoftmaxCrossEntropyFp32 passed"; } diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc index ad9914cf91..b8164b2af7 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc @@ -21,13 +21,12 @@ #include "mindspore/lite/include/context.h" #include "src/common/log_adapter.h" #include "common/common_test.h" -#include "mindspore/lite/src/kernel_registry.h" #include "src/common/utils.h" #include "src/common/file_utils.h" #include "src/common/file_utils_ext.h" - #include "mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_grad.h" #include "mindspore/lite/nnacl/fp32_grad/softmax_grad.h" +#include "mindspore/lite/src/kernel_registry.h" namespace mindspore { class TestSoftmaxGradFp32 : public mindspore::CommonTest { @@ -55,348 +54,6 @@ void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis, int n, int c, i softmax_param->input_shape_[3] = w; } -#if 0 // kernel testing -TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis0) { - auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); - // set parameters - InitSoftMaxParam(softmax_param, 0); - - std::vector shape = {1, 9, 11, 12}; - size_t input_size; - std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); - input_tensor.SetData(input_data); - - std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; - auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); - lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); - yt_tensor.SetData(yt_data); - - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; - - auto out_data = new float[softmax_param->element_size_]; - lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); - out_tensor.SetData(out_data); - - std::vector inputs = {&input_tensor, &yt_tensor}; - std::vector outputs = {&out_tensor}; - - // float sum_data[6]; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); - - kernel->Init(); - - // warm up loop - for (int i = 0; i < 3; i++) { - kernel->Run(); - } - - int loop_count = 3; - auto time_start = mindspore::lite::GetTimeUs(); - for (int i = 0; i < loop_count; i++) { - kernel->Run(); - } - auto time_end = mindspore::lite::GetTimeUs(); - auto cost = time_end - time_start; - time_avg = cost / loop_count; - printf("single thread running time : %f ms\n", time_avg / 1000.0f); - - std::string output_path = "./test_data/softmax/softmaxgrad_out.bin"; - // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - - auto res = lite::CompareRelativeOutput(out_data, output_path); - EXPECT_EQ(res, 0); - - delete[] input_data; - delete[] yt_data; - delete[] out_data; - input_tensor.SetData(nullptr); - yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); - delete kernel; - // delete softmax_param; - - MS_LOG(INFO) << "SoftmaxGradKernelAxis0 passed"; -} - -TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis1) { - auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); - // set parameters - InitSoftMaxParam(softmax_param, 1); - - std::vector shape = {1, 9, 11, 12}; - size_t input_size; - std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); - input_tensor.SetData(input_data); - - std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; - auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); - lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); - yt_tensor.SetData(yt_data); - - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; - - auto out_data = new float[softmax_param->element_size_]; - lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); - out_tensor.SetData(out_data); - - std::vector inputs = {&input_tensor, &yt_tensor}; - std::vector outputs = {&out_tensor}; - - // float sum_data[6]; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); - - kernel->Init(); - - // warm up loop - for (int i = 0; i < 3; i++) { - kernel->Run(); - } - - int loop_count = 3; - auto time_start = mindspore::lite::GetTimeUs(); - for (int i = 0; i < loop_count; i++) { - kernel->Run(); - } - auto time_end = mindspore::lite::GetTimeUs(); - auto cost = time_end - time_start; - time_avg = cost / loop_count; - printf("single thread running time : %f ms\n", time_avg / 1000.0f); - - std::string output_path = "./test_data/softmax/softmaxgrad_1_out.bin"; - // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - - auto res = lite::CompareRelativeOutput(out_data, output_path); - EXPECT_EQ(res, 0); - - delete[] input_data; - delete[] yt_data; - delete[] out_data; - input_tensor.SetData(nullptr); - yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); - delete kernel; - // delete softmax_param; - - MS_LOG(INFO) << "SoftmaxGradKernelAxis1 passed"; -} - -TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis2) { - auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); - // set parameters - InitSoftMaxParam(softmax_param, 2); - - std::vector shape = {1, 9, 11, 12}; - size_t input_size; - std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); - input_tensor.SetData(input_data); - - std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; - auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); - lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); - yt_tensor.SetData(yt_data); - - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; - - auto out_data = new float[softmax_param->element_size_]; - lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); - out_tensor.SetData(out_data); - - std::vector inputs = {&input_tensor, &yt_tensor}; - std::vector outputs = {&out_tensor}; - - // float sum_data[6]; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); - - kernel->Init(); - - // warm up loop - for (int i = 0; i < 3; i++) { - kernel->Run(); - } - - int loop_count = 3; - auto time_start = mindspore::lite::GetTimeUs(); - for (int i = 0; i < loop_count; i++) { - kernel->Run(); - } - auto time_end = mindspore::lite::GetTimeUs(); - auto cost = time_end - time_start; - time_avg = cost / loop_count; - printf("single thread running time : %f ms\n", time_avg / 1000.0f); - - std::string output_path = "./test_data/softmax/softmaxgrad_2_out.bin"; - // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - - auto res = lite::CompareRelativeOutput(out_data, output_path); - EXPECT_EQ(res, 0); - - delete[] input_data; - delete[] yt_data; - delete[] out_data; - input_tensor.SetData(nullptr); - yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); - delete kernel; - // delete softmax_param; - - MS_LOG(INFO) << "SoftmaxGradKernelAxis2 passed"; -} - -TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxis3) { - auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); - // set parameters - InitSoftMaxParam(softmax_param, 3); - - std::vector shape = {1, 9, 11, 12}; - size_t input_size; - std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); - input_tensor.SetData(input_data); - - std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; - auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); - lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); - yt_tensor.SetData(yt_data); - - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; - - auto out_data = new float[softmax_param->element_size_]; - lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); - out_tensor.SetData(out_data); - - std::vector inputs = {&input_tensor, &yt_tensor}; - std::vector outputs = {&out_tensor}; - - // float sum_data[6]; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); - - kernel->Init(); - - // warm up loop - for (int i = 0; i < 3; i++) { - kernel->Run(); - } - - int loop_count = 3; - auto time_start = mindspore::lite::GetTimeUs(); - for (int i = 0; i < loop_count; i++) { - kernel->Run(); - } - auto time_end = mindspore::lite::GetTimeUs(); - auto cost = time_end - time_start; - time_avg = cost / loop_count; - printf("single thread running time : %f ms\n", time_avg / 1000.0f); - - std::string output_path = "./test_data/softmax/softmaxgrad_3_out.bin"; - // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - - auto res = lite::CompareRelativeOutput(out_data, output_path); - EXPECT_EQ(res, 0); - - delete[] input_data; - delete[] yt_data; - delete[] out_data; - input_tensor.SetData(nullptr); - yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); - delete kernel; - // delete softmax_param; - - MS_LOG(INFO) << "SoftmaxGradKernelAxis3 passed"; -} - -TEST_F(TestSoftmaxGradFp32, SoftmaxGradKernelAxisMinus1) { - auto softmax_param = reinterpret_cast(malloc(sizeof(SoftmaxParameter))); - // set parameters - InitSoftMaxParam(softmax_param, -1); - - std::vector shape = {1, 9, 11, 12}; - size_t input_size; - std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; - auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - lite::tensor::Tensor input_tensor(TypeId::kNumberTypeFloat32, shape); - input_tensor.SetData(input_data); - - std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; - auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); - lite::tensor::Tensor yt_tensor(TypeId::kNumberTypeFloat32, shape); - yt_tensor.SetData(yt_data); - - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; - - auto out_data = new float[softmax_param->element_size_]; - lite::tensor::Tensor out_tensor(TypeId::kNumberTypeFloat32, shape); - out_tensor.SetData(out_data); - - std::vector inputs = {&input_tensor, &yt_tensor}; - std::vector outputs = {&out_tensor}; - - // float sum_data[6]; - kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftMaxGrad}; - auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); - auto kernel = creator(inputs, outputs, reinterpret_cast(softmax_param), NULL, desc, nullptr); - - kernel->Init(); - - // warm up loop - for (int i = 0; i < 3; i++) { - kernel->Run(); - } - - int loop_count = 3; - auto time_start = mindspore::lite::GetTimeUs(); - for (int i = 0; i < loop_count; i++) { - kernel->Run(); - } - auto time_end = mindspore::lite::GetTimeUs(); - auto cost = time_end - time_start; - time_avg = cost / loop_count; - printf("single thread running time : %f ms\n", time_avg / 1000.0f); - - std::string output_path = "./test_data/softmax/softmaxgrad_-1_out.bin"; - // auto output_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - - auto res = lite::CompareRelativeOutput(out_data, output_path); - EXPECT_EQ(res, 0); - - delete[] input_data; - delete[] yt_data; - delete[] out_data; - input_tensor.SetData(nullptr); - yt_tensor.SetData(nullptr); - out_tensor.SetData(nullptr); - delete kernel; - // delete softmax_param; - - MS_LOG(INFO) << "SoftmaxGradKernelAxisMinus1 passed"; -} -#endif // kernel testing - TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { auto softmax_param = new SoftmaxParameter(); // set parameters diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_d2_g2_s2_12_2_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/conv/convfp32_dw_d2_g2_s2_12_2_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..304ac956e425d6f2a47a579712594f1afaead493 GIT binary patch literal 864 zcmV~$3rv$|6adi59+5{DMrg}w37Z3BaRCRhfT<4tdl@vD42PtoF)O3(Mu9mCEDEA) zsmdG~>jaDAowcKmFmwS6PMGk2*Jh%y#)KfCp=fZ#@mQL)HprHpGcdM*?_Mxr&(}Gm z6G!2_2Mb*7XCJZnGz*OeTkwPK<1DbzOZ(+Vsbgh=ta*sZg+r|S@8_J*m{0O2dQ21s z`N|CkvwfjuGkt|1P`6XIzMqb|#C)Bq11%X&a%}uJv0FRM^taAJUxW#!f?as0^leBv zQjMCG?dXymU=DF9yMAH>o@qZvi@6scEw91F*=P(gG;{CyMC{+%9Gux0Vuv>^=)Z4< zom9&vMah``VH$W3T&2U8k}xeUlIOR&*wtA(Z|r^z-!kt-Tg)K{%FNtJ>n#{9Y34_K zjntGOrMzujkT)xW+MmLpRp8{^svN4<77CCR^9VuB-TY_`@wUG5pkrvN;ux1wYF zISi{T$6==l)p2%E*v#W{r4x(Q4{=Xy6WrN6&-$Zfa9)&31u>ymApebg`WmjGT1_i1 zE!?;!MBRxqSpLIZHvL#cQ~qJ<9Wqg|ZIHigupsSx2|f{+SXpNR{h7TB@AtOR{uU)G zGG3-GiH4TUb4=sB$9iBWced*~i=r*A+>+ZY`i%f-3IM_3f< zWig(Rtca~dY>7`|*N@fF`cgO?$_QH4^(Lj~+{|(c zSHQNf07kZY*jl!!szyLySQd?82m?o?VVEhS~Xj9L{}D8;Meg8m)& gW_X}3g1NO{kFtfGuk zp;DrZ2AV3-&!6yo&Ux;0o$vQLGoMZ8oK=r=!BKmmeY-5S%sK;~hqiE`+E-Aj>>ib0 zaSP=VgitDdE8e`Z43zW4h_CNA-khT2m|&{GUWuDBbvFgJgr`TuHI-SW&m_j1^_p6m zU*;w>4&xKsLNrM>Ba($juxayIV4MFz!RfOQ?%2$qI8&7qkKt3_LyN&tc@4bz?oR}i z4N0-e2xT*WLrH)aELfGxt=oAH^QK4Re(C8fLE{G9pjCs}SKB#(x@w$vYb#6`K881Y z%29Dd4{{wM@zb|!oavm+AYr%xUnLd5_v(1eJ$RmbJ82y5mXigUZ)U_*+lYM~P-0P@ z^;j}>9kZWk#+f;{@q&t9b3-C8U|@VS48tOlb*L3%^W8|ClNHSDmLa$Jcfn+6~Dt7K9&(633`xBDLn%UWC;w*(5G@fD+h_TTbbJ&&*d^+V>J^eEC zCCV9i!##fuWQo^tleiQJMR&olQxP87DNJHdd_a{F9_~2&ijyp^<`#HXQS-iCV7lTI z+V2-;&MB3a6Iw>f9cMqH3U6P6@20D`<^5%D|NIHCHd71i+7F=E<6^iUngr3Y>G1UR zU+ai#r+Gxinwv517_$2anoo0K+Reof|2&^JHSR9DC)MHj{5Gh=Mijp&fbVwcgSyFi zc%wJ&UhiBWR)P8;quh*2DG#~Ogl}*pp`G)OT*PGfx2SZv1pEC1@bhQNX)7|Z&gbmAJlKijy^@Sh&<`Ja|Hm>3lzfnlIKd(H-jOf6A7O zuib(B-L6sDDN(p1QkU`H?}k$D0FEgA#OopA>~S39)(t+Ti>Gbjc`1#vK@}npTzJGGUq|L1~Ik1GE6eWTSQux^R zN`@@{C&KPrKSo~8zDD-n^#*ajy=<;bA;|D$q4nb|@+G{RD~jsJ-qS|B?N;7won<)I zmOh88=YGKH@BQ51mLjfENQIgFiGuN~1u)Qd8@66P%KaNqB-bLp;`u!}FyWCKq%3zL z>2EVQm7q-AmYCtAAY6_;eUjH{e+f($6Egl8hZx!wFZ zY}}zVkdB|vR&O1&7SX>8iJg_W_@*#hwk8Bt_nx3Py5vA#r2+3ooI=&fHJozrE$(HD zCW#qmhf6G`;)_#%VYTQyl7FC#Yrpp&>##Fm&zcsIY^TduQ_%p&F04h-(_+xj=?DeO z_rr1@d1l`t#cB$BAalqAmcMdD_p83(ucJvLY=f*+Ce_oyxIYKTQc@42jp%9E6;7<9Mpff=hziCJ#~e4e7h zVno`YSXh;%Wp(1Q-+Rcyh;7h)J({>3iz6i4czZpGwYyy6 zC5qf+O6o5;g*ETs>FsjxYG{PWPHXtMJsl!VHxre6z0kO=1aAzT1c@a&SiW7HBn@8y zsTE569VgG@G%qo@z+J?+o=yn3t_tlMgy`7hQT|ju-TAx|WL~A??)3{BFu~}Hto-Oky5*D!)h}2CL7JYV(k=%Enpbk# z$7d7MyWu2mJVKjcDJ;uzC3A^1xSdTIn=ck*{Idq`$-QA{jmQIQ^d#!49Ii>2%CE1e zq@sUjkr9zXw7hcwEI(f0`W^Sv>|LLz>biW)VWRBW*+ZcFQP;}ad>MNw{u6J$ku7s2A~ z_vy%q zCf`h^kc}bM_|066>0g}#pH~|&?Y}cH#%%-H^r;442(DvVSEj;&0AH5%x(8L`rC6-J z0&y+4%00X-%qlzAaVw)Iky*Y2DDj^>Y1@ArkLGRVZ`-ti`RwuJZ}Geiw${b>PDO=+ z^r$^e`*MmnS19sFn>Vv}KlHF=q9>>L{3Pao{0d*!wgXQp8O>A-$%#OY3|YS6{^$s> zvr3NezGpJGJ<5^=TSahj<&)w5wt0AD>n)w&-?A>OB_lSuda09;&u z82DA|Fl@nQFpEA1dw;89R-Oa?sq!VFp<3kndm-Y*?}3BCsZ{f?7IPk%L{k0qv1s>E zPPHZ&Wy21!i-nVjYxHLP8eKp-2fLswO`9Z1enIc4Dx`bu1W-^V{8_by3aCcXx%t$onVs-L?08^YCy=^*z~7hD?8z;Zu3eDE#--6LGt zO$Q4cjb4V1|87A_^hT&}JWn+eT0pzi1YT~F!XD*0usGsBP*a`BoijboI~6{OHI<}7 zvYahb7JLCR$3?Kgp@Mg*M*>^~_aN&Vq>fGPRIW?QtCj-LZ4y$1Fe6 zH1#x!_))Ci9>Tt#_=xv|CUOe42T&%yh{(8i;=c?@@~27;EwAU&&+H*;OpBrg3QgRx z`M2nw)-dpSm4lBy+9ChcFFaA53MpHQF;k!%acdY`BX$WU%`T!Fe50XPe~?Swf1bXY zCBf!M>O#u~N2a`f6G`(Q!ht4Hoa2^&PwaI_po##|3O)$c7uK@I;K$gnx*lFMhN0-U z$>hJMmaNb+6fTBj;+jj*;N2L35tnXa<#P%7_W{l4LXV68$j=5xMksCQ>;E<4tf6~rim%k8t zXm`>RmuG^ai3I6d62?MYgvj#6s^s&NPCRI|2~U4J2WcrHtZrW(S9ka&Rf}EC$(%4} zYTtJdkxWB03(v#n387@|;;F=9UJsnyo{w?AHQ1uhx8URR&|>Nty4JQ(1-6Zkw$<4+hR<-jeoYNa=OHQ1luyP(OT5{1W2@hK>s>wq`!lURoenw$Y{CK1sOQe%7aF+1d20UVCmDP+zNr!%=Moz zN$tthTc`VOFe1hnWcQcs$n-X|4tF#GOVF~m5ykL^-L+s_B1c4=z&@;LP<;y)FDLR%Hd3i1y z_FMqR%Y-3oQxy%El!1Pa9^%z34-!7l3v2w7p>ESzoMS(iTjM^Kh1%v)*&!F6p^+F- zPE}*G-v7lG|Xn-f}#o%Y1i*v#jm*<6wkk0HCO!L51 z(2y<1phe+$XI~q-XXJ5`(@S_M&(Fea;YiqpAlA|N(Hp4=qAVc*DEJo3qqRGJHr zJsFk!yoZPI%a%IMz<3tB)3OKK^4FvL>~q%ZJ2Xg{XE~i_|Bq%$?jgpYt#o{&Gbo;Z(=x z3!2WagwBnUkS{WkSj{^|Lf@}o!I1^1rJWAPBIG&o8$47Ws^fpxXyCs;(106f*K&)} zx^VizT5z(70^8{?V5#6SOf8&(O)=*nZ|W&L;+#rdP6T48^ICis`jlJQy@7=8u%W*; zE+)rzjwfqBM?-j#E6k{IM?vL8Zu|Sf@|E5)y-PScaUqr^mkqZImEjhBB3sPI&-BB3QyZb`(z5}fh*5or>Y@SI}=Y2 z80^LT(`HO_p)}F)`p(A?s z^D!IlR;qzdxh=?V=8;O~0+AjHWV+Z|HmC;d*Ahc=u1~XK!yOGR!Qb+`+D%n$ZrT3 zh#-Gk#E3>?74GQLB@gFy!jsqIA?vIV%b9Q&pj()U=_tZd+Y6kpR0@cu>JgQqpSWzt z5_0`;Yq{{%8F2rrHjJps@im%0z{aupBR-{_TV4O2E0N-eTSOFW++x9^>tpcOIvz7z zWJl~Ky+9jF0Z8WwlVj?~QLsmjEKL((4wrPudhKwi%ep}BVLN7A*~XKve+chijt4`# zM<|ir0ZZq|uwwgT*l4E#=Tbz8=drWomh)sz`B)XqG2aU5^VY(+(DCG{$RCU;Ql=9h zoy7mN1)0v}dI+#^C%Q@t(PO?A^Q!xasZnZd{k%5o{`a3?<2K9kwiWIxP*D$D}x6xnK-dl;Uf0Scc# z!2$D&#J1CzzI!E3J|-vdbcd&q+6Wu!IZhi7KlqP(@o$I|ZHdFT%P!&x8;)5ISc78d zOPDakV3Bw}sJ{sXfo1*l#Oc124l6~nTG$yazD^|1wWQed4>pkCtIp<5j>fZH?kslCIMC?Yg}xK4FlFL( zXpU7Oj;F=Q)|L&Zv*RYXT)ob|w>GoXAan9AtPEF*e8PSHTfyUm7we1DC-D3V|KIFj zl6FO&gxD`45`J0S#se12K&qHsmH)`E_~*~2NSl)Kor3Iw?`p7DF^75MCPPrVIV5Vx zuz-(uLD4Xh|8T}1-r5KAp{VsZ`LlQeJM%seh8C7`T7N1tz-!mAbIW+1ZR;(R*GY!K%hlwZ>wNTSeu^GBdhGZJG5S4X`&enK%&@JX60O&38z%6*GI_`-gr!c5s*)dTBFC z$MLq$d4x^(#M&oLa53&oExr?MJ%ez@5(mEVRRP%e z?I`@Tc?+G!^(enOl>MTs*@*2qGH`DXn`G$CHt>&vk=P?nYexm`^8O4(Kdi{5fC$i6 zeGl4>xv;)43BD>Nv&5s<&^SX5K>0Sli|gZdiz`D{(I_0>JR5V*CDZV0N8!XIX=Y`< zp3Lk0i_O9NamITUw2kq{>`iyjV~EhxftHXHF2-p39@f>bPL3T5AyPLPHPe)W>Dk}7 zPm1YyPW}U&dY}O+LP+6ZL%uaFei(Av7b(+yt8B0nY|FxE6yZ4>hSN!A)e2tderi} z&0U^u1Ur)qiBI-IEDV1N@NqpU_&1H%nYpmymp|ZVr8*uBxR18O_xSR^1URLN+%cLU zL}nH>awcBOSg!61rfU<4@-OwtD>*5)Re1r6d?CVqulr4HKNWzR$2Bm%YEP;!XppHl zHlW6qlT7Yb7ap^XVn1AUnU1(SkM}KwDi2O18^UyPznu~*dChU-4PGJ7Q-$g67G;0lvO0n<3H=tiGMKQhzkB%eU?=rMPI1oA2;{IjpOUUqU8=(AG#8|2dqg*=qwUF zuz+~?F2TtL>)`T(du;ooH<*?{$d!B*IxL*Wt{rd1TNC54IkOoX4+es+V=B1X?ZDBy zDR?dU0vnXvNtHI{@I7|sbN3y;b8>27C^0^hUEU~0)Ldfdlwn5*O8Sg_W&QZcc^Ol% zaDvK$daSNk#3o&mWQhU4Q88>8IUB7=H|`$dYzu_YG57~Bnd^qYYoU0USg;0(edLDK zd=|XZgH6bsMfZ(63d6z*N@*r5Y zNS*N|Q|b6L1*l5B#cj<~CZ9WR@@Chbg|Ll(;O?W@tTK8oyS(xV+Q|%oAw<9;5GD@a z<00rt3e*_@IGwtJYeq!aYya(>c#RC!p18_xw)Ej6r-L|cuncp$x?!ubBy?03prVT` zUX^k|-BoYV+9`s}TfB@0r!Qx!mq+R1vXd-guO<6A&x_mpC4)=d9SC!^c{HZXi0wsl z==oKK%2RK{l)*{lixE@7WHD4}3|6z{B zcCOq(oLWTKeK#+dXrYRAQmMRfsfFM(|0sS;%Y*YPbcwo_J(_8j!lya2;jEQ9)WtPH zj=2)c?bFAF6%(+A*mB+?k5OUHed^<31}nzrW2OCMZ1mTq=ibg`or4zSQjG<>o;3qh z7a%^~lf;Qy8P7gE4>52dTZ1?3Q^P_T92#<9BX@DQfEMO6pRacKI0`jo-|_ z)$t!be4@vVzD?!!K7WjR3w7~!*?tl`=*#O7sH0Na`MBa-5qBgik#lL<4L+|c;m@8C zJQ(?&=dkh%{!7e&Hh)1fdefA(Y3dNCef4y>>lL>>R-6o%}aPbsopOFN}xwbFMfq<~gkBtw47RJ2FXj1g06QvDy>Oz z|DOmc73<>HAHRuZZn5kE%CT$d1FVBf;vIUYL0uFLaoqJyDA213x&Er8F8vj{?GGc9 z6lbu^+k)&v)pp#Krp$hC(_z`xPcY9gge|?=MJ?5|*s-m9us}AK&)FRzALfpL@S>y8 zt~HT78CS}=?3hj3Z?$1Yr5XERz8V{AN8wA)6C50_0@aB{s9xKT8xJhww(L}5OSjbA zbInv?Ex)G$K2B!jO$=15D#keLA9!LQ3#SF>lkY#uaIgC@C{Q}f{rI~FAJi+e^s9?- zp2ijU`&WV}D4yp)Vw6t4sY5nRe}$ZdDGe8>!q)kE?Be4r$j@%VSK4}HX>hR9I5jPHB!XryM=)#-&Wc27EaQgskVO|K^VwZ{|j^DUv=HrRYehacR zUIUgZTk-ZDSch^VXW+=6H(o|3GG^7m`=>pceEtI(0D3Xn8 zwlvpBfpjW9!`gN|yuY^_qy(;G*b@sVjr)KB|4PuS%!U|RC2(r5+=!QnAPFk@08=_Q z(M|OnxtMT~{tSD}+Y(~PB5zG+Au1kheStX34Qc}aKwUCv>N^~loIx8mo3a18(=dZh zBtfR>wB>FDChQnOr&GHr2q}`#%tOq!+>tJM=uDEQ&thFipW=_vFVHu!lMB0df^WM0 z9n}h4%$Jy5%aBd0v8P=4`7W;C^++{mA>gfJZ@_|1&# zEZ4?cGAl?a@g`HnlE>Cs6x3xHd#KiZ`@69O+lBp5Fjow~Dg>fyLg4+NCLH%z$|Q!& z$?5~HShnL9T&s~KmHXb{!hu5SxnnEdPm3b18CuXh#z+J{PorY;0nGcI!PpsNN%lpE zkdONliSDvqcsc73ii89Zt#32PY@_2`vq?5q32KtSI|dLLRZJ^ptRdU?UVwk6l*sd( zU|jsk2JiE|$mB1<>CETZi~o6z9_r&WEhTrs^HGLB*Prf5?H^;l$`8-%uT5D=j~tWi0>+-Sys!K zYd6sVQ8Eph&F5q5*xhHZy*4Ta%YfZ)8zxztm| zaHzv3@ADRY-E z>9UD<32U-fj#8}9Cx#W8JwZjOX0BgWiG=zj5m#k?Vv{gDuY*RPRfVQDWim-`G0d9wi<{sa1$*T_bKhnj zhBMxsv?_TOcT(7kz5jF>EA!IH87l{(v0jsX9r_Lrw+`|umiRE4e=(StoJgI1z2kB| z?giPB4B_O;~;SefrY4?2U{y>urLZJ!}|l`dH@%aI%j zY=CWN!r+YACA2)P#JbxexoM(DaRO?vlCZ}Z=1~W`t}9cYw)>#pBulEdE+EPYm%&-? zImVU@P?Lw@jLeC`14;tCV~N=qHdTWEWMdG&*6R6?z*Up~nVWwy9`3 zH)BR1#%XqfY5ECRaODzepEoB~ZJqE>_XtV`FXv#FI4a%!TR!jh6qK}_Px3aVg4E2D zv%f{fHIO8GQ<&8w9}kvL8g`H9(5qdo2DONje-)(A$lsRO3M+_-xOF zp_S(`Y*GNc5IBj#FS^OLK3|{(=lM4aYq3$<2@FqaK*{!Su6z0=@YCD^^YqTa-wnUH za*+%;bE*Qbhyycge#5&iEQjOu4Pg7#cq;O&0UUH4*&4ZjT_}=F%9TeD#nbm$OwCjH z6>yD~DjLDLU&6#ISQy+)#%AnfMOb-X9?!Lwz@_4aWYgSrIFz!4?Vj?J*Q7`BXSy3& zPBy{k+Q-=4QE3vkND(ic$S#*(Z9-QyWD@hAlZj2^UEKE2pUZEvCZ;9tses%Vk2fww z(+%_4?S(C&q(e3sou6&Gjt?!!7wk5{1 z>>?pHXb=I)pHFfF7Sd#L>_XTs{(*PLBobX;>XJhTX0oL>F0i)2YUt|=z>AVsaj%&u zRXtFJGnYN%>*mPg)yL8(AijTWKWO2fG7B;#>jb3E5@HRN51{;N54?07p$ZSLpxHkw zGE*~&%NblnOlElE>s6)P6xVzdd|t{O7rGB?7Ityni^n`*R3XfHC(WwLFF?xE3^bA{ z2mNUlO^y33TrgX#^m)|n7WD&4v5VoADVY!!!JL2gg1eG-Qz?eQ+hd*raBDX{EzP3 zkcFC+4SU0PbgieUX+xlY)fA%k@L~GceQZXPLVA?49shwxg~G8R&If~A}O<4#xa!J7)PFgV{51+=5_ z^5s*gcRv=&tXFfknHnTuREzZuoM$?R<#FfnowWIf12Y&Zp)d8`!${~OtfVKw{ZuIT zVP7@Oh{(nMAD=nT9S+R*@i=zpj64e{X@EPAcf;Dmd6?}PjZUu;VPW%IRrW?`Y6 zE`;W;CLT>Ep?nUb*-Apt7e1a&d3&E`H#T!Q0r}ifekOeE^gx{$UHErmm_ClX%I$rX zfb#p**x3eyvCJEW8~!~3v(=^;VrD|-|J=$B4sozke;ouX`@@ZeF{oNl0yhed;#`*9PHW4BXqUvPTITIpsTY~SNH;>;vC7QS1ER!>O z@f-_lYjLLZ66-pXF$cq|rbmV(nCKdcg1`E>ddC4s@VRbXUvJC3PrC;VA46E*g-PUD z;#+RmGlA8X>;mVGpHQ`>8t)Ah;Kt;i(7v<{B(yB_HZK3mRBae{S|al z_BF1lvI6X$YqH~tsW3Pq%!RC2PEDFmgWJnL;9SBn*SdnSyXGh@?NtQ@p@n3r$4^}3 zkVJczuSTnLVKl@$2y$}P!Ks3Gkl~`o{7=nhgfn0Zw=5$>bs;GShRb`6!R_@pZbU~F z%|=pyE6fEy*Om0W#3N*LUzE?Ys-52O1v1JXT9fOsmZpxe>ic=D(`{O4PYHDOXrw~Hh3vcGA5 zWh}jS*p6Kiw<8Xlmc!ikon(4J1HaRDHg`h(3(YNE&hks&;$iK7))D+jl$lM~!Re~( z-uYF$l-mGvc^fc#Nge!oK9g4uR&-_e1hAa?3%=(t-i&Nb)>*v>ZkIX2CMQ|kZMxoC z;l&>=>iu0RD&s>}>%~B>(KpWL$q;Q?^$I<|P>{O64+L}+IqOxK+*Wyeu8fz-t;i3f zfmgL@QR_5r_Q*C2()bI9hoy1mizfJ>=1%yFdwA86dwF(w#~|731(*1%8CyOb#+u~< zBt88g(TZ>-O^-yOX1N-%bRQ2ZoCuun1kU1fBKpU)L&dt|tmE%^va)?LiTyE={FkwX zTnG<^r9lzA+N$(1CLsljh4b;!%~~oqA(+P0htRFQCA{|!Uqi|QH||q`BfH-pf=g>B zl3U)Ig!(D4G*^GLUuwbJ$`n|ib~7Z1c);6iPmuBzhbwvyuyJDuc{r9fmN|#BitTsE zgNel`*c6JRzeLy$mp33;Y7D*|GuddMIy5!L<3*WS%yRV-5OOY}UJ`O_(VtlGdGiuW zH;?tVH6G#D`gUGCEkfB@@fcO+jG4#3V~6=V)|Mzj49|;_B%=i&Xtsh*8uRp)Cu4Yy z8#}qahcY;B!)|;a9>M$FKZ5`6j{VO1#dNB{DcWKsfg8SO=IY!_u1!i zS?VXz*sK?xyy(ODt2JCmMm1#Zsp7V>-K=(!34t6LayT>|RryQ7_+$@fztx>v@^l*G zymd&B%LejzRwTJRdoK$eiiD(tUqEt67x?B~gPW+s&7I{7lky*O?mN%W^%F$wSt zjw|D-IA1{>nJR9^Kqzs)J;txK<6&#GFyr2tv-II9Y^6peH()%6>^*my>4=RvR?8Jk z;k*`eJG+%t=X;I-q^jS1~2sSv1^YxI}~Za3R|9I*Msy8GT}u497E!iC(CHD9=XKTT4t-;XjDQwUpa3!B;};`de` zY;2r@M-(c!;TbPzsYMT0C3_RD6pQ2Gr#s-4LIaL&tHr+|`n2Jk2Q8W}i{ff++)n3* zRQvZCyeOv1GTQ&c!vD0;cApDK|MVjZgVbr!>HlzKswMNvNo6q6mfY!9qK*|NczW?^ z=w9@RE`G6tl&HPM8(~27U!}t#FL!*aN4SeKPlK3>2Ht5s1UWHP{G!|{n6b!@8da#2 z`&z}p(IiiZ>YGS}z761okWN@4p-Ik|b>BO&F^LSFb|h`_KrXK_VCM_t@zU}hn4q*A z2j5+xj%&{1$NneWhjK@|4R>g;$JoiB;K zRZpqsMKSiwcrtxCZEQXcFQ=lL(s`c)gpud1K+Y6K!cvdF@NMBmx@X-C*b-~UdcwS6 zSHCe0419{YEjpO#rc71|u7qpK%0%PDX;3O3qW0|-yo~`4@H!wFoR^lP_aixU5RM1g z=dBntasf|EFT-Td6W}s?9<$!3!1haz^`wIDgL#7*`7z@&l8&itL)jRkI6n-w8=k^< z8&%?YY7TL;)h1s<1z_Mn7y3W3!-P>C9(m}?L5!)Td(6lwI}Gc-ACDjUHDwQ zkvis7&^D_?SbsYne;6nbZ+kB`qPGAat_4=N^BK+?Zb0d^Cvl9UQP(zW>gIn5N)@J% zIFk;{Uz-OH?|I?hD|=~jvOOqF>ZH4-4&!ba4j(M90Z030%;@`Ap8C@a$DMnPo>I2x zCMW=X``w{Omyql&MSMGbAvjc6%zs~?1=W(9p!vceFSxo1tRIYJ3QJ>@*K;C2oGwAo zr}pwc8ABkV{t*xJJ;{E%s}LOOfR^9;zb}NJDT8Cg=QO z$vu}J*yZ|@m;XT!|LSbx{(e6SF$*tZeu)q%xH{JTS}=({lD`gaIUQWAfF|Bpb`HjO zq;U5%_t4i5maxM*!?5*KA!HqUhQH~LvHrthtXQ}fv<=6&Y+wp(cTYy;&QEy#&@$i- zcR*se8T)xdiFX^=*zECyxJNsos#%E$tz}d-Mrh2TrDKV5 zGSeIuU{hX~!r^`%3T@v*JZ>p*13im~k#{UyvS45l@*aFFp7YI~-asHBI4NG zyd8v|S-_EMeS9k#K-Q;f5m5yozptfXt9~uy&9kA7LBF`)mmhIEevg#D**L~yhdZ%a ze-2AFO2pAIzgPV770$l#%i3d5jU`K5!k6RIVRV57Xr0}P$9s}7eAR4JxDo*#*Y(M^ zKej9^aS&rX+hJO*3VX6%3A-~+kW~$>VCrteU!d(lrPYI3+xjlNz@&+))@jb~k2jP4 zZN|KPYvEW&fK~FfQvB&64aXa-*yT$vaj(i*diI$=J3ATSw(A#M^S%fTEN^3Om{;l>x&mAQI95q(vC&sg^@1j zHU2s0lUWDLG-FUP#)zyH^&|Vld|>r_8*B`_i?0IwSoaNka-qH#o_Y9_7MQeUv6t*EtH6t8Tx#Oj^Gn7QB*d%V?@oe5EeeMWKk;?TIUQi>{RvsPl~ zhm*nO(+AuZvL73cO^2qz3iy;S$G$W!w>CKZ0OuU2qVdu;cxxFDrNVE}|45FD9}t6S zkK5qUk&`5+=Nx`%wIC-xe&+k7eS<&WrCH?PVg9d2yU?#YihFCa9z@1mK<0#Oys$Qq zMcy4pX4j8nX;RU2-*;o)>zX`nRmcO}C_bCTm?!Zpbfz#puLvSr%lEMe#29{3VgVv3orjIr{*8@S??M>Vjts$2lfY%Z2`ti{#hhg z#Aa~eYi-!fIZ^QSO$%JivPbKYLcZzoJM_{cMNY*d1HMi%CtkHP=zqQP%;=Xi@#V>~ zS1(S2VNnB&5Bxyq9`#{QjxsRx6F~854Q94X6l}Z{$@lrO+yv9la6|7rIK7yGKf@!z zyf_nA3vQ(afdBaQThC)i1Dc%V{RM zPYOKbkrO;7$#(L7;Tiq_oZTu$gL*fBn|TzhwUB2&=2ip$LKw3*Rbr{(sW5x2c;-KZ@9@{9eTzucDDSuixW7s99>C7h$C#wLXR2Q_Mj%sVQd z_K)2U%f(D^-mgi-yr~-ZSjwWq!1qr;RFX zd43UkHQ(dHE5qQ`=MWq+_bKmKE5RIG+eWw8^0`;p zegCOMHe@0(_i-hgXfrwnZ6|-u1dtD7z1(f9#(HIn{;Y4b3EJ1MWy66-q2Qv^m`k^& znqQMJ=fC~1I>VX_ru_u#QZrse>M>BS76jU>j)Qvt!Q+Zn{t1J#xN3nmE;_1;g?O3k zo@|SYRa&v>C9)f}#@1>H{bc!*`Ll4RC@W|L`tm&m>Yuej?!?@|Avv;3VQ{=~%Ip8GIy0dpLi zWmhkqVGgtFK>m?Dyl9Oge?LW#GRa;%*DFu*;{Djf2y;9iGXe``6`8tgGFaVP%~t8W zh59K+< zgTyDEP+$b$99tUPF(0=q7{w>w0?CTau}D8o;U6rBgz9#}PTsu1?YTW2dR9k3O26=! zW0WV=HIvDQfCpUBK>BW7e|#2S^xGPb@VStMP;+b`4y5luEYGwu;8JWt1= z#7_{UFxDmYlO!*rV`1+1m2CEqAbFhlh=hLcffW9OKS*nQ_27N#mf=Hv&oOIno#PgP}IE}dArM1+Yfz0CNFqUeJS+FYe= zICt?=5qux5Vt?Ytv$*Aaw)N*)GL*fR$rsLJ8Gc@P_mn(t$(9E?GZU`wvLOfGOyg`- zj^Nvs=A_7E1v~O45&8qq183%g$X@K*L#!{ddv5d8fp{ZTO)XLzmx&vDD?tIZYGHd@i#CA$Is z)YwdzKi&hB$ElDst8GMl)EWMjJ>sO6Ocug;#oBC3S z%+o%}oqqEP)Uv*gInH~Ko$&$}d#JGE3EK%3X@aS1F5~j`-dvymEb7~-3h6VJu{E{w zB&m5G2^)Kt6}K^;?%E{6%3Us_bV&lvbUlv)PsQ2(kX1~1e-&T5Zyh=~pF!_`S8(Du zIgC8Dj~y&Y;v#T}p67cr1BW^6sj~}_Ivj{Ceh#4gH4pRGXt1Y7zcI^Z9x5d6B0eXB ziI(d-JXn7PH?%kMORQx{jM@#<3W{dec8Ag>Nf+6MXVIuur${s}$w8ruKCxSQpVkP3 zLbGEphLn4f=m5~fJB2LUKo+DX7e&mD1Qj&4niER^4g~`fo03})QE#w~j zy_v(+X-08*UcqQv`Vpl!JL1ht_N0&h2L`>h*vnrFxc8@x!7$4MZLHV8e51knJQl_4 zp5DwwY*)o!;Ym<)Dht;KE+V!WMKCR@1%}JaVDs~Z5MP~#EeZ^p2eZ*kdz2qnXG+30 z3(>A8w@~_GC``XU2yX8HjVC2RUE>vSnE4Bf(icKtc=;Ibf5Qdd-vV*;Dg2cjb3{`8 zAnn93vDgg`z7d1Rs|Wb9d3W&{On{``?ZnB%hvdjzqN!~Y$KF6E;fVDfUeud4Tz=Rt z;*}9j93CWr@wImPsY!vT*G(cdPEGWf{CMV25`$*Tx^RbY6nGD(MKNOaEh`9Vwu&#+*f-F=9eD7&saPFt3h}I3RKrHVN$j`_Tnl z&HoLYi9c3d7lsu=hEOE)6iP@X63tVueKccyDD=eOn zrHT%QM8A9;aR_&3?wPPSxa>Xq?x+vl%*{Sg7aoJdiwsmfXiI->901?N8uYnXDf-zc z5zEARq;b53ooPP64wfyav#ZwPf+7We)zc=H98RK#q6%<0`y95qWpxOz>8MgB0|hSK{WGYPjEXmh71+LTX_x zEx#8Hg`Hx|my5M*s`nC7VXH}kHYB0(t`a84^)$JMTwj5-2MI3!!1K3R&%MWvbmQ}B zbVa>6nVrZ7k|J%E{*tdl?Yjsm`S^?oF({QI4WK~Px*%rQJ?TK zJW^soGnwBQqaR2DlU~Aljz52F84G7q)38uL6~^;xSd%@)xX?BUzk9Etn$h#S~X^iru`v>(61xWS9-UzX?pVYtfRI%DDwL&~?;= zPWrBlUpNO*k& z?wPihyf_m<97aAe-=B-o^{sZWD5Z=QbKFSW64oJ&4rWb%*);5uXao_X=^(*`FxCAT z<)+MOJ;PR+Fs&Yb_KR*`9gDCIP98nn55VI#LlvO zzQZ0%+B|fQ)#@?@?fO-)`S=X@wr&ZtY>@!zFm%C#jvMjnGhzG@yPJAyW#eDv819)j zvNk?h_`V_oJO=-v7ySECCMccVT&+M`{%xXJSvs&$CKX5LT9fnH?(EfL7UZe0Bt6?g zaqjg`NW0dNJ3kX>qL>yfRl9~}N=N7$?Wu%}+wwfStcaDwSIhHGz}C9`#i)m8m?pca zWX_y0^7__HkhB>Csp1oKt5_jXS6;%~=aok~tmGl5)e0qC>Y3d+;V8iISCc!H$SPSs z{&-^vDgTs%O)I|OSr%#WVL!IFm#G6TQjumq@*ny;%CX?^4 zRPgZJsdU6sow%!%ft2_f!k6=a1F}tUMe;o>n7R^U=ZUeV=uIaNsL|{%MZBz_Q5<>| z&3qVmf@-U7SLbW;_5bTSKn*!XLZ)xlB#Zn*8j{f|SuswA9(2e`Z3C zUTzkmQrmt(_eLW!Q$vRQ_`I7Oc+kduhf_iRq%JY-+KNj@huQz;&to3n`~si%v#7jD zokZ7YlBLqKiNBva^LoS(Ec6E0UJnaW$L$p-AJ;I}-xd;u@4C$Rsv35{V-Vhs%7MAUm-BFQMm5tQDo8~x4ze@1 zETlYx9Oh8A16o%_)9+V#WWKZqwaOQR+#qqHBHW2K_p+En{5tFmJjH}s8IWrcyZA|u z!%5ZkIpmGlWp=kW(^uyn=I&SL3&Pagn;=JKdeFFR_7iQ63 zA+l9*4fx;tjV^xmjK4`PPtE=tJma^4WYuO8`^6u3G_9s%f~)Aj(jm6ZT#S9oKT5y) z-2w4PFPi(-iaZJBx;PvXh~4p6vg2zySf(4XcMChAC)NPS^4avsS!WcL`NTf*MP^Hx zGAtI#hHj0oY`$^>O!nMoEbd-pZmm;6Z4G_WG8BR9A2~v<_DNn?!(5Pebfl265W;Ss zhs@cWZ$HHdJ%q-I^GQ<1v2QqJS>_3-#2i%gP z23KFfPw`7+L8?6%n)wrftCEC_Z3IJNLL);SLWt8;RNnF%Eq=t|SL12aO71oMXIe-8 zE|!Ik(%0bTCBw!)v!Qc%vP4#B4hW1zptGJ4^Y~gT)_TQ)qMH?Ig=F$~LmCrj%aEGZ z0xaU3X+tv+4B4;-$YCnOeLM>=U|A-YF<{xf$H); z=2F6Wm}Sc6&Uami&0R))hWhcKrwZ%Zz8gkH+(_7>7-H>k6WNqz=0@2M)=;<~*ToAk zO8tA;0iQYK+|{=bd*CU!F8zlL&Y)m?JOwQunvxSyvP2`{E_RJ7q1SaEfXPm_((M6D zmi6#6o%fOZ{-ap4OPUQXrPBU)Pi}@E!JwPcj6aJj=sCQbtj8-}OGJU;W`~ymh zY46`m>KFf>O<1UfZ#ADVFmMgFpZ9|58x~ZRO~xg9N2z)1cN{bL1OmJCz-6TwZJS7? zeK8;SQ_7Bj>)UGfuK81F-u8#bS5%>4=B9L`@*GzEiw0X!t53X(USe|96zo0Yju91S zKyoY@v#KOvJn%nMVRYCbj_;h?Q;G9a?y^5C?&0b?O}M`~1HBn8%n0JssQh543$JE; zJ73|}#57E`nndeKT|Del0`o4((+^KeQFd$vF;LzO3ZX%`v`3lLW zZD3uRSK_D3CuxlOGI;b#oG#c<1~W!y(xCQYI;VORizOULkj>^w455eBd0oB6(hU!d)GJuKSR$Ul?N4aZt4;CFfo=hdENXVi(1 zO1p;`+-6Q~+*xqpJw&rFX5_^0?TiYnAi2wzFpe}AJ>y*P3p!!m-5|>6n9Tn482lRV z2$~mPqsg@}V5hV|>ak(`AE7W*x|{?I2$3h%x^&y+Ov{tU=W=`bxgcfN1gB?C0hPeV z%nmpRpMRVLq2OVx*_!}4y&>dw%{HQ@f08NAdj_FJe^GheA@)|~Rc7J14Xs(Z3Q{XJ zqP>qYNn1Ao{LpSpb1Y-)HBL4JZc?LSBaOUpJ!P5_UIu$DI>5G~0#j)iiFLYy&9;{q z`}S0Ls}#;y&o+P}ArJIbs{`-cm6&RB9?P?bAY^wYI-4EG`g1B!Tr3TXloPNcq7d7@ z#A5F;MESro5Fk2>iR%{Ne2$&)^Fk{GwJav40a9egvkV+sAOm~$KEWuiKT6=jd~7?c z0Di&g{5KJ?z>{4G{@UiC=yVW$8_vP{e^;RLQ(u_ER zF^&bLrzikF=raF1mKo_%l+T-ybyymA6unfM2X#^<2cX$@M!v2()yt*Dqf!Cz-; zMO!XaWAUozAim87*W^c#@NX$_<;ZuY+T$VPv+y{~E!1USYUt7EntHspbsQDsld;9; zFnL}V!-%=AAsL5eLvpeLalOdmMs8ntwxtsmWY471(M#w9+Z5(hvp%g?bcEsQ%L6AM%!`OPSQ^|&wlzAq6*iZAfILplFTH4_6K ze#D;Qcv5-Kh^X!uhR0dI;D~7*hKcGCv(Z2-)k-Ee$7Zl{T;EoX;-L3q(Q!Lc*>1V3)4gMIOA+SN7e>9!oSCK^;OSrj6_jbi5pV`BPJ9KY0= zks{tk_Iv+%XtkFoXS@F;BV&(%-!uhgm9uE&^cvF~P7r>+B={+w1;5HptbP0%4t2yr zjjj^DnHq!coxvch=R%$LY0`>J7or(+A0CPe(>=`&C?Vd$ltpNgpLY$({X`-1LQ4ma z-@Ai@kvj;KZ-Y}}bLr|yBW6E;9^KPqkA5{~#QXL>d|8=B+>GMDLuD4)r1(H7xA$^* zkbwg?6nW+$0w^pV%z6#;nRCnL!H@F|SWxPTg8{dpS?D6H`W(k*b$VG|PCv)>O?Z%` z)276~qz%u7oM8vv|3xqOf^(cwV9gUrYW-7?26q?or_Fr8wrtNuzT#&_>e@+IDi=WO z4rp=?LL|o9mB5|1A=rOjfs`G(!KyE_;*EApBLjNPxZs=y|4_UJd-kK!e>zK;c=}@7p%lhl66Cb&CIGJb%WYJxW*F3{$ zzm8)qFB!A-Pa>#JUp7r_x(Bua8?XqL)4R6>Nmzj@%_)^7(mOIBw%-8%w?>m(@AJl5 z!lU~_R?;JpH8ApLEv@!j1E-o(u~A_$Z8PF#d4n;&Nx)us8@CrKqB79eBm#fdJjUFc zGSqU#7Us`p3uf6(36j3u30+U!Vdn-UgR8}DCTPJiX8IyQBIX-NYrlS_4)HfIuGVt- zI^)~y-H_d#oDDj}X zuWW*p@lw{aE{qt*Sks_dIgk>Px%M@&_LTT-G2(jJI>x-_0n(G;4?a52f;iN<2 z@+8SxdlBs5=7{5;E`dc~Fc`U(Go=qcWBJK;NUB@Pv??dzuzd@jpF9l>_pg)Fi_f!J zJvt46UN`^eC|wz=&4FdBN?a)+=I#b-=V@V0AK8>XTJ3O zM~<$&1TFgap|f2X4{RGJiyb3js_rPRWEr5$*BO|+uN=-z??z?wH;^WC1q9>bAY4=h z>5X`j@-+Z{Dh5*LEh%u@Z!Q|>?gF3oS$M+cBwTqvMYSm(ck^8_+Pg!MD){JOcJ00@D*had1HrHKAn7(O>zZ7F!dUJQbN)7KrEUyAx7T42KPT6gVF*y?$Ev`5Es#hMJht5@3{w8ckHHrE-r_? z(zlT*9Hjx*otdVsN;qYI7KXZiF*lkPfS-v1hI$>N=9_CEf4~NdL)hSB>q^etz)gwyLv#lPxs%tW=;#qMqJKA+h)0WpR?0lHFO@&(wKOSI-R_GH;T1T1RutCk zUBM0cc?`Jwl<~=LQwWG_rE)6AVVC+<4EV&5hXn>O;v#@MLY6~n(j^>uA&fuYxr0Q5 z8K{j!0~`_JMrO<5RBHjz##k`id0`Mz(fx3pDOgR$#5|^jTw=ZMzhG> zg46JByEm*OEgUK1%mL4JTo-j^@UxD^Ilg{a`q%{UZ3NYO6@=HFM&O|8Brcb1re@AjIDdl{ zk!m;&j`5F4b*?x_#%B=a5OwaO_919c)&loQeo(s?2?-IUW^W^ksYrA%Cd){{!d;*6 z{*$Ai_|^g!^lxShq=b??jccIo$qiIt>d0LsJ&YTB0*Qg2NN{T}YplBgHSSB}lb~WW zy6Q%Io$k}cA#bVXIwMdM|3djii}ChU0{pz>1Ki0hytr;Uy00E&mlIJ$8!`0GXn-~A z6G+y(uXtYH80vb8&@HJ9KOds_qxB-p&P@WZ;623Qw*;iLt$>d^9?>yVN)POd$2W&w zlaV*`FlM$BuCVWC8<N!XxtU zNUK*MSERm=Xv&=gdX|Ozeo;DY=2iUGdL8!a%7grh6vp^@C4GHn9M2qA!42bwFAfHytZ(Qcsi~rz z95*f6b^Z?iQu4&XU#?UsbTsW`s>T-`Fb+n@Pa?-z0v6IrJqLqn-U4lzSQmwmNqqaM@;X zqVd2)+(9>o|A4(;2m^%PV}yMiOU^QbWQh*a&!5k*`rUtnUFtPtNlO8{MT%8$U68w4 z;Vl^nJPuA~-Io+>4u?ts_qc39dsErj>{nrY*r%hq#bv#C^DR4K=-iA+~nd1Iy*Qv$WTzDmY zjGDRdfvs6Pjej!_O7)MC*J=^?uUrut-Gj-F@myxfuIpr7Zyc0#Ou+LGd}g8b@(?&J z2^FNvsH;*e@0(NjF)oc%{>&qiVTG94B8GZVU3Bra8*IljN62`C4((#q@Xq@f`TpW4 zPOi=YuT4En+m`kCV?co{FGZMP(@t(LiDLUS#-i6vOUC5m2q?1yu*g*ew$4ig4X35t z8zaEJH9%=d`6;$j={)p%O6i;&8CokCPv0fwvHuK|K=Z|0WXb+5V4<3YNuiPGutym) zncY;>tcV`_aS09N590a7Q*~~-(>X<5Dz#u~8k4chI^_wHUB_B*2+g z=SN!~OG3Sj8=ZOC41;IIQPXI9^yyxWL){}(bKeqZ$Vfq7hfG=+^qZbOC&gKE^DIa! zXyWygFJQ5u2Tnz@VD^CsR2G)NuMgC@$^99)dg%tt4mLxNGY*h3&56{XY(y;)b+aS? zeu0`zGwiXJg6+rGutujHCOh?{F(C927Tj{e@BKrx;+_uuFXlW7ScT*L;n|Ghp3Ri? z@hVZ$N+Pq3D456y;j$x!*A;2efP8-Tkpd?=bo`>?4hwpF!25j$!wtAvx~wl2%o21)C{L@U4gfqZ3}F;L0Z` z*xAEg?0TB4wLVYx=)c5hRz6j0yvV+%kVuCwea5nUBRKnbiX^v2Afr7C>d0j<-1m&U z%`9cwlfFVfa~3ar*@YWFUu3I(>|%$^*oillmN4Nvd*M=V3VvN3KpuXy#H^PIa6siE z#-&w|#{!zJSnmYTID-@pVzRq*eA179bi;Lq|fm8O+L{*%nnp1G)T&= zMOCgIx(wc>ty`MFNIn4A2`%J3+Y58gFN49)?PRu)5*9ifK(h%yNR3Iyqbe*kTqa60 z-d!hS=Yr{vb<4qEUIAFxsbGHod(y3}!qr}qLMP6P19MOX9<_ca7XdC$QjrFLHo?s+^Rhy36ckF*#~V#*~MZ7SZkopjm07y)l3BD?sjOm zT2HH!g~`h1H+1=#`M50F3eGw`Cq|qJEHBj{D>`?R4MsM&^yOYOjQolT_oQ(=v;Y+O zMX+}ZZ{GgR1;?Gi$T!(ZY!uVs&yJ0-Z{c)gUMFH{lQCoto}lIvDXilw*KoRCr6Ff; zIJ}MTh62+pqU!2JLa$8FsJ|`voTUOK$~~-|>$qgWPft8=xt4}?4l$dyen;z*y2PhA z9FVdNn#jS6~OF`6_T_%_u!vtN|uXx!8NRfi0N!fqrxoCEcQ@kY8>Yezw0w zHwWpEv#AYqhkG$@kDtML**FC@duMP22RrHh8<)sAms?oqaSmdS&Ic=rBzjOsn7i-l zQxHGyi>InA;OF^#Ds2`+Ukc}uYJoWnZ-&4RjD*PWIAANyCR$BeSaCd+&EJ|tn>Bvo z-i1>ncEkuQCsLT8Xc;6%Z}G@M-ksy4z&X{~#2)mX#fcC8MrF_J$Fb2iu%vQmFW<=6 zlpe=FdY)jnu8}Em)q!u%o{;TJJn(#MK3d)d)bu<@28TDm7UN3DG!6s4%6=;K@jXq} z$Y6H2r!&R}`-!HGEBB{p6y&Ks#71*@w1)`PHur_Mi`&^#;fGPh;1n(&iNQVn&%k)m zAWVh{!HVTSi9F3lZ&gcp5O5M*k_TXkkpKwPo**C0^3j|3&(dcFfd*5G0}Zb9EYU}s zn^%eD2T}MkM-A2VduUg8H95I60uF8vfY*yQlevD^;E8t$^LBwUYDH6Gyl$9}4<1&1BJus%_@*|PhLt&C=ye(3`@o`K_6;z{ zw*;UY-+f{lI1T)QEm3-79By&>PI?tvNaNM()HcQwY7UfPlh{H0Wj};kXFgG`%V%;x zIh(A{FTn%m0kCj?CjGZA4A1P$huZ}Wtl8oMq&)pN**#r>Gci*R7W8OAMfxyGyDmbl z{skmg%7|XNWCg#gwn3?~1PTN>Q}h2AqUv@M^_PlaWwHz|A9leFP4CF3@fdRb-+@Y3zy_+t4Vc&qD+^GdbgU~f7}-RTIibvAIMRt4;? zZ-*?ACv@gJHK#jn5^uvdR1*KnXF$6RKdqA;$1fbf zXKDUmc5oK_)LM(?x74`#@7~ft>k}xVBEzX8vpAXGB5~>7^*DCfn6uwH2Ocf@2nGA! z5Y{F!=+6xRe!GLAL=uo^-CJK7q0rbcE-F);@$IjdO6&&Yu}#-KMNHozy`31}r_k7;FQ} zK~ZfJle0P-&X~-BI8|8~x%L#kWrxAo??$+GSb-XLt>6@WyhwiKN>b~DV`OI!P)XaxswTp~K)Co{ZPNr_x454muBz}J6 zOsfl}U|nVmRAsqAd!HcpslO2#wk?DGSDe6IqJoNvN6;^0nQB@x)t^w5kPgLWsviEHMTpxphu4# zrZ?#=$SiB2AM!hy+Po2nU+M$uvLbM>#g@61$3;I8OMEmhm>zBQg#6i&WO40!?7Se3 z4TcPRT0skL(Ncy>9))E0lqoJbz8y>lGvI^TZV0ko3|6}|q5tDurccEksyxqAKl%Vh z?QI}op+D9uo`G6PC6JX^4aLf3G}|wNK)eX&U-3P>HN1nzhdN>Lyf@5&Rkvxt%1GGP zFdvm0g}Az^oh+kRUs!FyVrA~&v8KriS~GVe&V5!)HV<7O&5>m|ri6Go)rdw9YQl7x zII`~MzuSASTe11goMBp}B1oQF1nOm5*txeD@I83QEZQ5^ z-`%l#y9$%heudCWwWjrTg6zV!YhY}p0{Ooz=;NaWI8fOKI}R=2Jgj^~7PvkoO9KXp z@5$>Brf?EH7Zibod@Qc7-AFg+dcyqUQrJCqgG{Xz;{G;xjxJ`i>MT5^K&~+hx<*#u zZjTHiE|Ngs%rT(eLXuob_8po~Yex&ET;OP}6mAk#0DEuVnNh2(tF0?%VhZ1*s#P6( zx33W$xgi57%4Nt%7=T)}HgG*;eqldxhpn;#}H5z~;XkJeZY&p;z{qq}fP7zMUOTK0blce`j$@o@wJ! zqa^asG@EVSwikn5N^#{6nc~Z|2KwCm4V{&G4I;fCP_HIg5Zhb@otLF>;FmARck=Xp z{dY|Hn}R#H?#FSRPJ+b&ZIjxb4RZH(M=giVcK>HUQbuJ($J#4sl{jN~QH#cD| zm}2(z!|+l(6CY`XQt!l2m`IGmv`}s64-EjBq+;Y>TSWJC+JKw-IW{Y;oSgrof$vV7 zVvf)9!D-uqnXU(tB=zZDSkN1bBQ;~Ni|;Hx@YoESrOx50^CR?E;UX~k_<)2*KPLb3 zqu6EMp?D!-1}B2&LC03-(@od1K&oOPu30V%^$~M%ZNdfImmG*^!`@NBbq~qIX~t+! zG?(KOkwQ;fTG8kS5zr?u4)5&Z87co;M1%@)tgZ}!>W+5MZJwmwb0g`2$9AyZ`wKlv zSmexL3Eca}h6=|vX!&AE)q8fpsci{pZqP>#_lHr3H#`uovbin#<4=>n>8Y16NT=`X70c`G#SBMG%`3XYwmP z668AsiS6ahI36GjmJgE9hd%=2$)J0E4U9@^s1P@n9Bk`%k`5u?REHeRO z3U#pOMLPS3eiGFAn9y_UTwvbq8SL4|Uf}4y-N5&%nY{fI1oaY4Wct_3K<1yPs&eBn zd?1N;hKsP$Dv=&3_)8})IHF~^54kkPf@y&zxOFHIX5M;5`t6p(%jB6@^pMcu3&rGs z`3P>_Uch4*e{oA;7XB4G1fg}W5PqD6QS(9?vh5|$AIswJT3@m-oeMMOUL`?M`Q*^v zKr-$A1T6{kK-u3jp~SKp9j6O&Y0^S+i^l`@rC-2V_X{A`o=c^x3o-QEMsm8B1s-2D zpqZqTyka4^YLtZUiuM0{!gtIp+<7EQ-h|#`&Tof74_C7qo zdUGw4j72_%?4$dz_w630Fgh6GO5AbQW&aF0ANyF#qA z$&PSYVskhLJ#4)&a?=I0*scV>S6V{sW+%lOFjV}VX2zA=l% zM*jr3d8{15#E`gQOp1OBmjZ#?BGg7v8cv5w(I?NB;Ya-rFmHZHysrO1 zLy2=ZTX7Y7``*FC-XDyFN;P>iC4?XA9cXgE05MB10=K>@7!1CSfeY>7c+pe*gba-u zt;Z%&ZP|c3Rr3jLINl_yi;aJC ze?m?0es*h>_`zFn98$>kXc>?)6~XU28|lrSb>wo30r+RRp?>xW__oxJ{b)>+ zQRo(`0dhPjCCr!`^ecFtl(Jwv0c8Bd_xCT+k|5Z#qJoUQ^WcXeZyd<>MW%dN?dS zK;wps>GxO;*qYXWo&g2hxrb2RVGn*s2Lz5VH5m6J;DO^t*Atm3u~ZDrkB3Wl7=&; zJJH{YkF&K#ko!oq5N|{`gT8eh9Qx%#2Se@=O?gAK9M2|l-tQrUH9-$O?4yBk17zCe zYLI?tjn^Be5oNyx=-Q-+fBi$a%WjOJf5=s8{$M-pT4@N;y+7E3t1rRvSAsAR!ppBd zmQw4k1|lLLLQ7nSP<-B3I(6eYT&d)s>6kwpd%p`^*|~Idx+Fs5HuCcH6kI<$i(GU2 zPBI(}VWRUCYnSCQsJA@{_l^|7>>(F8IJ=M>y!j731s|~=E>`Ap#q?=!Sv)SBc9onr z>u1-7?4_nMFX-M72N0Pp#F>;I_Yt=&aF%r!Rz~Fb&6go z3*8Vm+OpN=26eeFlVN>SWhz7*En-%fspJ_46}=8Um=Bort1kP4+%aBDC`Z88Wu z*Papwr(?R2R0OcE(gvxVS zOw^$}q;|t|Y}rtS{Cnnea(6#L%h7spo%fH?F1StQoJ0XuOL4@Wo`iidndrVo9xpa$ zVRc6UsCygW+g2-_dMHW#BBXF|O(YnfLkt--M@7~h*5n$7RR35+^|Tjol$=a)rj#D; zd?-P7p4NjKKL#OU-*4vMlR$9pkRY`gT~y=sM|yg~l76ceN9z?8c%R2M=N$EeZ8wXl zm!KHT=n>+UFO#L#Jm0P+e-m!%EQaf#Ps%-B<9^#-dTVC_oVXRuPWk4JXY`Li`uTUb z%rSy&vzOrPAM#-dwM@gnS}D%PB5mfRwGEt~SBX8k9qbg*l#XQ ztB=E1o=@Pl<8@N5sREnipVOiX-L&q4Ikbc=;r?rg0Z;KTa^vMJG&iYcRC~;MTq+rA z?lobPQ7x(5)&M~(q;R9jX?R^|$G)`;;GTXp&hi#y<>np$fd*M}ASn+XOD#oLMOl{r zJtZ*GibolNK&)6Dj<(@P==i4@@Ge7!GpF|td+TZi2q`Q_536z-zbF$1HeEz3m&MSf z#0UHjJ;8BtA8pnfM|@jK6%A_H_J(Ru7GZ#YH&(#Jd?VbHEexAZ=h2N{(#hEK?Zl~U zHWt0_CUQEK%;BYVR4eZ_^tV+q*Ka68cF?D~vLF69CXxZSuI?pX7Kh<6=PnU;7R0d6 z1-wlC1UVOz4|y%Rtbh_#ylCJFf{Qw+?e#dkqS%dV7FU3JS`t0|U7O@ci4grd1v(;< z!4|GL3hTBe)6L5sl1Sz+GjCTPDo&4umK`o2D}Nu)+6j_N7Kh1!<5nQ|xs}9jH;3`K zx0pY#6MoQ>j9Tp?%pHj$9hD`#EI9%t1b)!J!)c7+*mJC1wSc>Lr99^UT}2+Q7#mPu`d%SX4pmEDQaQCZY?_1Il_GYl1r^0H^IBT@8Jxpz#Q+L z__9|6Q_Omydi*oZ%*vsEeEvXvqX<{urvtB>pCy+@B2hlQgYxgKqoJFhprUmNT`cbk z6**}j@%bc*@!2D3o=-@+EpdIQ#%*<<&UyLC9^|Hrkrs6!Og*y+PI*h?-v0A&p;Z>H zqCU;oAPL9CqHsx58R5=}pb6Vsm;>dPNS0F;Sgvb@=)?%nTN#aqyF&5g$~Md^Yaqwh zcGBh_z4YtOy%2aa0o=aGqx!9bG;T1QxEz1PK8r|So5$n!pwsxoOO|~g{Tfu8D{>6h zC)erumEkLqO>n|^7v59NppwBBSo0%}DPL2BvG4i09-pD z7i(#$^*oMGS_w{--=>{Ho2f1@i*C-CfX$Zapc|KfY9@TJwRxBZNzLYN?X!g~t2EJ; zhB3ZXu2gcF7$yY%hojv;a7N=J=ES2DC?kFvuU$R>^^UrXy8cIYNboOGk)VtP>w2kv z_7eK(nj(hPz9natoWo3qW+Ew6!PpmF1@7GK(3Bvx}5S}zO2@+w$c6@tEv zevr9tF|429!>lwH%>C~zCOQj)u9zy$@~Z*!nQQ5arE5uMY73-^MG@)yCuy(s4ES3! zosL>oQ{Uzn_+|YY^3sE!%s;gr{*BM1Xgo~HgPzx!b&g>6ELD`+z7i~7&w~A>p6In& z8#aGep_>D^@Rn~nxXo9@cvl-3$eRhBa=vK!ED!qX@8F%va6GGiiImPLLsR1mut=ev ziM%07Tt^nuThYzr$*K{MynT^}9qeus4KaA=Q_7CX7i3!x%t5dJG%$N^F3f#UOY1@k zXqrwwp1fj<{3~w3;Vtq|sa}R5{$9+xbw62OQp6FqWfLgxBrlirl0QFd;d(|3Xe9lH z(PT9=dbSgH{j|aNpTj7)IwDJG8V-cUGk^J>GR;mOkVdtVBmF+aYxiNCv*9%8jjO=o z9D7=!mjao~W^-%{xwy@*6Zyv^xN_&GVZ;0Bu!VOgFO<`PZL{Cd4T0CNGj$J`{hLLN zJP$J>QWu!LTed)dOE}!?t48qzX=Z!WBuN;}MwfrL(8<`D#mTw?kFtZQPGcog;QAju zl|D%goLg{&mlKta?8Yp-49~ybgQprhX!4!;;Jxw(gmvA)5wRt>lD95Tj)+EZm7rx!Ir%VS723$Gp^iOQ!9_=pSj0bN>mNN$e;?PtS6(CRzVrIHzv?x)EvEvD zmT8dC(q!DHse=k<;xR(Y0#5yohG%CA$(wEO$->REX>MmJxJ1g)!LnnZdL@xKWU%o+ z*%i<+aDk;UpaPery{Nd;Zql;8loowk0ZG%2(WNUS@h2+@UG<%r({I+m`>GLo$j=c? z16o1(TMc9?eAc+1uXG%sjl$m@5g+Zs%mPJLMGS&GwBDx4;pOx^nAlQ=63!{~0FMvE zDM`XSr9_(Z_6MWd7>E0VrLlZ?7CFC55miiF;efV2x9Ox39PE2YLR2O3l*>yj{Jsq? z`b%)1R!cyGiUVF*EzDW{W)0Q-GDVzW1ZFI|#3ZIF;XGk$SpMTFbISMyt@0OxJ*#$u zlG=YX_P8Q$n`(m>dy<&>?lYlVa|gY>Cm5SXE@I)G7(fvqC51E^O@dgW6SnlIz`t$@?yf)5H2%8?xR@(}RaP%8Te^&1nl;-jF1VanUw;bw zRg>s*>I>eh`ync{8+F>sVD@%l^0tbP!+LQ5)K{8KQ2r8xP z1M`;+lOM?&Jc2sDirW~y@)Blx(dukIrWjV5G&Mk+h-r!>;?AS_s-2-5t*+)J+S zu%V>*e@3+Y$@#^R!ji)UQnT?F+*j z@)>OT8PmX6XckIm@N)(=c^$iiTx`}DBul5nY5#wR@phFhnRjj;lpODdQ&NRgrqdbB z6wI-}Kn%j9CrG8pHMkdV#tx7sbmPP(lysKo76_JLb<06q|62|AFJQscV@K4we2s{e ze8bvztMOS+FBlwWh>55PeX@E!eQY{H3<~FP2E`;Wu|geRdb*>$|2{mNYlw~w?pX2X zCr#enf@fF@v9@;&oZO*?8Xhflq1a2t;KmK0C6@G(?;!NI7va>U(-@PS#)@z^fwq4s zMs)lf;_m zg_6{s;xDaX`lK@fmP?hBKi0)`XLBTv3B4kX&Xd$#RGy<_5ko>&sDf-rD@1NoAh%za zX!JXEv?D8yAX0^yckg2U=F3&uKo5i!S z|La+*hg0yPq?qI^YXRZUGg-F#Td90o3f-K-qRMiMxoMq35U4O0=NiQW*VhX#cksG$ ziyg@H%ENTT*8^Oh@x#%7xu|EFNz6A=Dv|I4BRNYUFl#yrKU{(Lx`pxYyfLVeh{5(| zQSO7}Ez~FKH+J6F#YtmB*t;whm~V~%^LF#R!9!^HbB?$sOTe;HF3-y(lP>RiQgP*? z*?6=9XG%Dce!uDk9jk)|2DdbWD;v)sL~Fas|AN`UY*D_i*h`O|T!) zhu9sO#IWiR*#BGs8`r1;NJZ7{kMY7ePb9gctuYwX#OsGz%!5n&yJ=+TDVX}fC7X67 zLgxtwER|l3L4A(Ull6f8^=AZewXY}ByUNM>RvEaZcY<;4Sd0%XOvs#kA?|^yT8$f+)@y}}`u`!<_ZoSA#Tw>RrPA5qVB4!4ML7h5R8p0yre`fV#L;dQr%vkuc6 zI!lRYY3STpfwL3#;(D7LIFh;$y%ow(=5-;=^l!j-jf?Q+(NuUPQ$=zG z!&us)SFzhhnm9g)!aU;~EE4}nYCH;|G298p!y{qGvM;c&x&&KxnxdN41G4F-6}?+8 zK@(O*P_lmwhMm@eK9ev!7t)0H>_*^mh-wFf!**I%m54$Sk4!&MKlUt11DAF<2En!e@M2V7#Xao9%vr!A%29 zu+V|dyP51wSAXNF|9+CcGpg}F=V(~}H=B7na|NtU7v%oc3WFwIhmlniO$LvgVIKXZ zC=;HDqq`lTuRtCTu9`{i6y1j_ZI|IidJ^mrlERlI7chVF9^t`lIJI?xN=Qv3C0QPz z^u-jXOPv1yjIXYk#ku7yiPwxz!vp@0OFU{rek%q8|Qk=bU z4{Dv-Nau=QAuEP2V;ui4`mweJL?xnVOP&|9=f{IZe=WYNtbrL$@q}U9(G<;p^yRr; z*uQfC)_4a}q4VLmcc(3_w@iTldZbZX*A60f$l;dmyJp|z?n6}N9rCtj2B&Oz0r&g$ zL?{-^gNCPpZ0)Qm=lHZAl#LNynrp_}bfaP8fmr|8N{1REN#pFxl>E7ixgRvi*-x3!8hHY* z9rs6(qcWuDpgtV7;>SCmPT|N>dCs*HlK4F732G0WWQv`)gMwlfjEh~wlH!Hnee4;U zyuU?-jZ$HTz(I1`{uz`dOoRU{UegxAO}NRV6HdD;b3Vm5qJZffxN+eU6D+R8adwQP zmE|YvR1$))TGj-NZ!X0Pd3R`dt16USyohzDWieB=1GAGHP-HI8|4n-fN+Vy{_eP`Y z4)WNa(2aM@rNP$Vw-ywZWj3vl;lt+u(`icQQFB#}X>{g`vAz_^?O-6j+Y% z{YfzT&P;@)iU!i9C`uhA6sXiQ0pb)ljA!!h(UcFs0nAo1X2-%<8&B*>tOmLZrAFs}V8vQ8dOpz75i24Km z#&W27bS^F&nF9gR2jCg6FLTygjdHG((BE0-$WNsqIB#WxqEm~hchgOxan}ys=StDi z6SZ*VL^gA2hAb{fDFMUUr_2MpSuo}vRd=yN9A|ho()uJ8%&{=XKU!5N?YV;v|Fyz_ z7mHxO&pcc%t;>71Ax5=fAFkC*$ExNxxao%}gmf3<7N@VI&M%jUEl}dty}eGo_;~M% zt_=`9sYcw+NrSzI&SP-RblmxNFZdo=gU8@*Z<`bGxZF#3 z1)robnv2{TfGbD9J=c4S!hjJ_?5frMTX?6IdYWLw+SKC)!ag_T&>6%vbNgw@a(wpX)`) z5q?9s5=)8Z8)<5ir%E^H93*pj-5EbQ34G_sy2oQSck)9!UEeRywF()a#SMeR{aYzqOO3__tc6_q z%MFeUdZKgBMd%N_PH!6zz(LyxSWR|-^}g4{fhcn2MwWwXT?5vi5&?b7NAy5nH68wU z5?4mYz}i9HER1!4MaGeY-%b^J6s;kD6D6)gZPciJHIvJ~8}p}Wv$16%_h68{>6tkT zF=eEeE~{;$jae2r_<}{)-#@Ycp87y9G_Hl=n+h-%5f1^k>@l{X8okn0;CbS5k{TY4 z-y%c_H?#nHdwp(_nPtu>^O&-pjrdt7hrQ6U z7n|?I(~M`wY1sxr=$nv)K8@w1$U_$TE!KeH{T4dp<%{S2g2>oi8x&6A#iDbx|hs=#aWPtY6$p zD)!8Uo2TxA)@VYV)MpRaXUHPz5vf$0ISgHrd*IK*y|DIV8h#SbgC5m&C^IpPGUEeO zSVxm=w;hKy8&mPe<|y2I@Ejx!%q8Jram>Gjt0)u~3!HdSJQAcz-p_vuc{OuTchRjn zbJH+#wND5#Wa8-K{SGMTAkJm9*OF^rq8PXKWQJ^7ib*BcaLybt*6N)v$ZTD88WMLO zf_VMvoy(u%Y#u{$JR`tXsv9IJ-U-lUnE^2}L(G7t1hlWa0-o8C;4bunJbC(?(G|`l z3Gd$V@-IPLaJGiLdEQO8pYvdUelZJEa$Znq-X=`)AeVITk@%_bqt&WfTUZuYlXtIQF{fnbdF9Vt~93GoSnrn)tyVUzocC zchOo(tZx%C@Pn$hS1{{CGtqJ(uQNI?fNh>sN%hug(UY1KGS*9Bb7MPUHSlv@#cOaH z7X`tJ431fcUI*Ckyn?<_Qcw_ambHKGOR~PD8jMGM!7M!$S^dN4xL_STbiN92Hw#m3 zz95`jX-xMjF>Lp3ypCovPZzpL!mVi&pk%%Qzb|lunHI%(wX%dUyjwukyNcPdk;jNH zm4*9pNMKV8`tPqsp~pige{L8?8ykpS1H!)6KsaS$M$6qqfnWXt{nrzN=?DLU)BL3{ ze^nE`e&;F`b`b=~JpnU%XK)5BhjEwYYuZWLP{Q>Zth}cH+0A`q$)jKtE|;WF(i`h! z1#8Hr6n~VKKSku<@cM99evzuI+2oG-Z|tK|91m9^u1w`II_Z9l7QFHzr9Y4)SM0%g zn%3N(OYpJ$Qc-FD2=!xR{fEc{MO zftb8J81H>ere!TV^KFh|^W!g& z@z)6!Y{|hd*=~4Hb0?PmX2b5Y>Lgqz47Ip>;W6tFn!-~mRI9-5i2DJbMy#38eRp^| z(-bRu%^}D2D(?J|gmrGO2vfEPMqc_s+R_HPtvZ3{nRP+qfj#Mw<7Ms*3z?yx%dl}_ zBB&{QlRAkEY^#^zZWI^bN*y>1af(uKR&7?@^}Rr&Ke!TIAAauo+e0w6yo2yHC9rR9 zO+%HfgeHj#aWCH4NW(jAn3UE0_*apGj{{?v@S;7eJpM3nNzAF!uy~5+1DB$G@ny`~ z+==PG*+g&D9X@I|GrE@Lv);hbf;E`a9R%BQ zOHka)8w)feu&=+8Mn~quA@w&5H?|UgSI)ry)@0H4s7%y*^NVGc8HYC<%Q3FgpR&%D z-oCEWOIdq&(}uV*nCpKCZG;L4N;%*ilSjm(Lz=UW$6y<4~d*pu7}>|?K|0Y8nfAXFW~dVOIzD;_4=y5To3j}T|GnB_%F zxnC`hfZbO%uJaiJ1?N5JQ(XnmXFsClhi;-#>psA}%_O5WlrDL8ko}sI4XgF%L9nqr zx3)VO?uOpSeO4?~lOu3(;tNr}K8Kt6J_p52&f+W*!;p zu*D@6$%3u4Z+9f#nJ|Q{evzQxY6(o@RtWi=h{KOqWOKR*R()82QZ5_-wJz+e=tbl5 z)%adXmOiZ8#+dg1Vhr1VfMu2jY%_cT{d`VnIr1EGp9g`0`Dq$lrHj?x(by~t_<|Ki zyKI(%e~A=Esa=I5W#zc+MQER~$l5uD+$$9n^r|W*g4AnxK?pFl@ z&-7{Is2*p{eQ~(BVG*|R@qE(Dcua8%!k%6qW>d04XnWTRx#vSauc36X@xff(Uin0 z@_5T1x^j9R%}Lxq?)aB8N3%oW!q`XNes@Acg}e0OO(R3No3y=d>ImiXei1r>|mHW2GlV+!E#8O148YF=d|Mx6621!-THb^%kW18T5=Ef`ysd7#koBhacv# zbq8!PqxL2~n4yefzgd!2zl?5g7-ds$hhq0Pb29L?lmxaS>C(A~1CkBQvE^-a@6~hE zE^ra9cpFJJj?_Xp^M)y^9ii`*`H>F~9l^_L8Jf!>);zGn{umutFkc^jlwDx13+F*# zelDFd|0<1Hk_*?~#FBv*9dP!_HxQjNO!t_$(BwJysDyGAZax1Mm5!w1TE|3cxA+VO z^Z$^F3J)|lHl*6mBbbbGfBLcKIr=MdedL^CYF}E4yI~QE*}5Ye>kSI81Mr2`FkN{v z1Z?w7V7_cHbR=2e5&19(Topp+AALd=`!-=-t2R1>_2Yk9A7RPmbEq0IfY0+LW0^}0 zdEw$ff9vu|z|R+^gsgQXMFppZ`<(p(US$&%=Qte^}Et05V*)>pN9awXbTR7vd zK58t>LXF@N^2lQWq+Qo%9J9}$9rFZsN($g*ZXYw#cq^RUD-BW$zmeRlbHL(JFH~iS zlg{ic%N-m8_M5&ZzqVDZf8#U{A>^BGmH>1GNe7ot+uBG_`WlKkB8g4TQ7gY4@D z%x10^H{WrN>Z@799-a`|9Nh*V=RQQu=VCC<mVa5$$Cf^8zfD7Gn^*t=XI#l6D( zf-p};2q0|>H*y5^BaF&zJ`x`jq%g-7sTuKI`V9qB1)zhVDG27 zY!6(5uI0~}(fCJ1V$>5pJuyR?}44k#=BAGv`(QQbSx4Y#M ztZ-g{^ieQQLnC@#C;|Vn#k5s2k(pN}z|XW@fDNl0;E~7;YMmO3S6;-U>zg5ZR9l}+ zK5U34D(Q5)ZUM|2YoswZZ9(A17+I#NhV^EzasAK&kT>F!?NxfH{VNm2Qf^?2*&s1k zAiz6t&y}08EP@;3Q?dMdJzk5dvt-mJb2(!*{F^kBxIA8t)vC*2jnffGiWbJ}`o~~n zRu-LY-4EVQT;6{9Qxfv3mWn9llCEk^7;;^N73oS;*8eW)e*2f`-q+ww_df?&7V}_} z=ws&Rvox}C@in?sHk`c}6vn>)okA8ph#|S}B@~6TV6};G17<#;%JLtU8qOa$-9?TlWRc z)FLq~OBUH@&S1OH1f%WFz$MW;SR!!^g4X2F9HmN}~Q*ceZT+#}&=z$J9P`IZ#SY9{`Y8Fo0H2D!MS1iRE?N2c0**~(oFPvRAaS~deUxlx-9ANRaUcCE57vm>R zg7<2_Q2LuDHhp@)Twk}D#O`au!|ms>*wO%f!mqFkUe-YP%Exd@n4!Nv)KMRUP)O`j z#X89_5`1fv7!e1}tNRK(Unxu9Z(T`+9gR_^aUOrjnA`tWsL>0>0p#E4M7*;0 zISrCugsXi2lE>flxxR(PPnT!Hnn6{badsi5+jPLz9a(sHBpNrR&g45z`Uyj^r{Ld1 zf)}dPutlk#w5?`IX!<(r{!~aRKd*$g>AhsRI7`E(rq(4rFoi$g9uk{>;jH^;7~zP?o2yCVku#+3m>v`r7sHZ+UT`Ji0sZV;L3DUc z$nVgFi=yo$OxuMdE-sR$!Esw1 z3d)|rX1Ph!w&?<{YvLkP;Pv~aZandSThXV%%!o&C^$0_k0IT*91V-QIh^ z7lZl0YZW4$m*(K<|IWZ-eJh&b_<{b^ZDsE+ONC(NNkpkG5+yeuL@qnZHKXH8kqHdGQpW6d8Ay$?0~c<#t{_{> z-c6p$Ke9y`6D-R?SW*h^AI@S-yOyJfq9rkzR)xpX-jR1g@gPty4)>L2pi069TzxDI zhGw6_8DU$&cxFDi|6mR@x;>=n_fO#I=tz#~n9AhCnMk) zjJ@kl;-f@ga{ONyhJOslt!5P{>r{&F2k%jh7Yk^A@+|nW))UvMI}vG}E)W@=PP@$o z_=QCrDV}H+*Vio2ELa39XdX8CN74EN6xV*(1>Zt8q46aiXp1M~Pscpm@S%_V zYZK=^v~wW^ojv4umnK;u+D#_e@QB^MFdUfefw2-Aj6zWk+_cu zKa^o=_Zhlray8e_@Id{N3)5NANmo~g(l@_2{#VKk%}F%Ay_${>HwjZ?-EkuOw}9R3 zorRA%f5#u^T#&XrLI-Rg)2Ht5=^BiJG4F}w=%rUo$0i}vbqL0--)f1C@+Fw4xD@=q zaBSeKrS$#Wa5to5{`VUWCaezswbHO80naiBW zqAcA-&i49(R%rt4_{x$1w8JW2XQ+MbMNU>vB=f?ek^Xm-#J%i*19}_Ctj98Z=_B^g zDJ2HKU-qN_Wt)qp+ zqG&1ko4yTmYvh^TX;nniKL@|$R?!u~+ek-|CmG*5iXT5lu&3k~!F8uOP+_nJFNb7< zj8iT=8<|f+l2UPciywa0PlSE5Ch$XRh57lD)ga@3BwiQm;C4?}@Z?7YQq5n5$-O49 zS^o@n`h~+r6$yB;a|MJC7r_ky9scH}{a`AbgHJE3gLtkOnYn(1m^Vm6{j-{Lt(hti+Ag^iJQI1pioNRfl1Wot0g@Zwkcw z4=%GV-D&9WV}bAHP2p!*Ibhc4Oz71R=85oDz+hA&^j~y^<3ASD^4e0yPv`_ObEyGk z%kOmN+|T5V%q<*?FJjuyr{Whair;O;skL=IJYR8=740ZR#@w75g=gT(38N&?K!|tD zK@QjXpGNbDN=(e=GI0%u;N)N*IE#dVgWC^Mm=T7_!q2Gn2T{7gCk2ML`jIJCD_C{! zX(+wZ7@Z{+(CCx}Afk4h{M?>E9H;F?spUCvy{dxD^LazxYv&Plug}nFYKadX%!3_| zQb=#baol*oj3*PTN}36qY7YakW?_Qk$Y6lEVnat08_mZyZy+_67 zeYL!^XEp>~)`byy9$KI1vD|I;l({Bxov>n&Z1C1FE;}A$dD%t-rf$iF$rB36g+s~o z$dT2sp)r%H_;-`NCmeBuWD3^ZR^XTKmxjGdw4f^~jMEj%L!o(OveZ0p_zMR5 zJMr7rNBEK_#4A4SSbN~oU`#&YX5FAzTv4iX|uu;9a4{9SdC{wFjG=LaPbx66WX z^g$F^HjfX{9)s}IHI6LlxrUh!grLCb5AJE2#h+7UhNpi%C(HKr!08KKIQu2XdhSSp z3(e|qYd{3@lA_?{`ln#Q{XR)f>Vav~-SNM|m9VvSE34TiiY*FScuY|j>e~WwY2jN) zR1so-%-u`KKOx@Ezf<{_zvk3hDhl!BO{Jmb*HR=cLrj2LC_EK=52Nyp(6apmjubqF zU=0m;cX|@EHCIvp#gVXijvcd6b0%-4;t2VhI1@}P-N^MeBxQfr@mEXq(*Z7reL_0~ zavxKSQTWL*R;P)@hYHRyr;E%3CAdCVg|FTW(q$_8yvtW2K*eq*Z}}c|97iRT?$N;X z`U7Co-bGS$j-zc}E>)2z;CjnPRN`GMJ{vl~E-I?0^2!syc5nsSHLfE^;w3?&?*) ze*Icb6&mJ}P1&7fM9l%W89u?rv@CRg(Mi*`ltKIA3ACzDj=ym+K+!~7=qcwsZTU&? zd4@dP`71}|H=eeflO~U*b1cB&T^P0ZVaX~;XU{#f#@s2Qc(DCBS({LXrarSVAkqX* z4M)+=zh@z%z?!ahO2OG1PUC6!cyjtO4-BS+0ei}noNqOT!CnKZ`S~(cO8Sbw8Esxj z<~bN%Hf(9QPM0;GXh$z-rZIo_LO$1M&^Bf^?%P*AOr`{^1^m~{PzsJi7e@{_>uU_XEvimzfGjIO*(>WszSlS(dt-BLJd#WTc zEFZ*~N8}+#&7Kat{zH5`H<6K`L=t{P3ZX6i=-bOkRjCsl z{jv`C%vR^ew^z|iMs^Umv23s|Vt?{LOT393K}? zc}mN=^U2MG9A@SGIuQ7?2tGu7WIr6G%zBa62w=(!}0_ ziDY;r2@bhFXIc&_!jt*0uscSWvEgd=novWunJ0$oJFjs5kZlOfXJDE~9qhS%2()$x z@}=9iLir*~w0!)H%$~9jf*!vmhEqp~>CzyyaEhQGzBkiC34Wc(vVU~J=1g3huLU;h zw?LO}z;1s%m~p2OHGBrZt)QON_GjUc`76@za|?ec@X5%ZY#bR>gB)HF>3=+d7oK$v zbv7$w`lkwdSIHlmZ9=igOam*X-^F&_Oi<~H$4!l%#HB2g40dYsg>KG87fEG&yfKqB zUt0hVN^`iZ)>VAKj{yg{SZI}EVAhu)P%7x5nsKMT|~{V z2U_lX_7xIm8se<(Ot9@}rC{HQ2HLmq<@{%4ckC;&`nEeB*v>gs%2ToE@+Tng)k(Qj zI1Y}TgB7ao={a9VzNOy=HN!*^tK;39I7=8*U$Z`Xq#D7@~^0AiwUwI0 zCEnD!OTMU{g?l$j$?3y$i0_KmkDh zs~GbfV`(YmyZ@uM-2*6Om4M2D zV>CXnhkiZn$?Xl*aj;qm_%H5YRXu}=AM@}KIY+)HA7W~%dLW_k0KMxK&w&+ zW*of&(}p;w>=RLXP&|zJHbtEJN!*5*{r2?x*GyPh+7HH~8nnO!$=lr}#P3NBnfB)! zPWRr+1V|qNO(S0{H#`Ua;73*O+yxn}A0(qP8&8~6}to2BvFu`^W;(CFl_~0i`{}fG1l9!n>D)LE`{D@K%WL8|VVZ z9>>A1Er00Mli9FVt(AIY%*4SbA)w%{fQx5UL$X&G{@7y&lcoqmzwT7DIWPlO7RC|r zXb-%arb&F(wt@fCUzlL41jC1r9$veSIA%4n{B_ew-eN!c);WvawBMfNf5PDuaIS8J21VDtp3tDWlBnBP|#9);aiKwq*CA80yai#t6@Z&|&-5LZkQ*9t{8n?@G`3~DW zlVFf{m$a#rQ0u87pu0)lyyKZaMqj>w%&`%om3f|BW|4}w@{Op+xm(oNY>b(;o8zGL zlc>?!3H;;Hc_^`A6#dlCaG84vo}cAaGSH($6f0D4h4&g9Y&=V}o7ThXd<$}TX9(V! zAOiPaa{W-%9Vj&rDBCN zpS{=yt?=TR6e}#E15f6SkhUKO=~w6II<3#DFfeQdM_U{qK`IG*Pd|pN?le;Ft;ght z|D+}JHb9{C7_BZ>ke!@6xT%*NH3&~@6qDs6gH?*Mo1D8<{vqCMu zC?dA<7f7^7@k0k^V>ahpE;en0{J2BVbI%)B>qd}L#X9QCIdC^9Ho;-VGr0EkELb#e z7ue^xGySQOIG$cf(tZcvoJVU0TksD znD_D&*1T1rt!F~WxbPC{`Avx)Su6;y$2pcX(gON==fNvCFVHz?U1KaWN{%QSP^Xnq z@a$SP24ZsESW0<=qMHLJ>TM+vL@QMg8k z_rP%`H;=7p0=gd%gRS;wnM2ymG|Zb~6Qj z_^Tf&de>i=~OcFUS38yN(V4n!M(HZBJP*};0 z20Y|ZN%b=F=4Kwp1VOcrWjTmUc=$dg^W^y5{W(ZnSQh$f@%=K)U(>W z^8$fnL0KCqC<~<*y#%Q8i&f;1pD`@ta%t-n9C5~?MiPHk4j&IFL$H$}n(uc;jY?r& z!|qeCc8U!hu5H7#zx5cR#?5rH2Vq3vC%aI(7{6K0g;uA%wH`jpAV=aKp<;YipGfjv z9~@=w?3IOvIUM_|b{#%jzasLPiu~{+7oa|Sx}`*H9GypYSH`9`A<=a z%b{ez?k#qpLAw}r?<0`EJDBR77$=*_0r<^wbbEFJ~mkN zLE-i}sNSj!dpiViq5nQ8PaLY!C}}DPf15n zj|1_nVAv^Qxc2~teiDE_`3d|Zk1j%uFb{RT9?_ubRveqY3`+joW7MZ^;Q1P_h1IQ7 z`8@@SynjOr$&arqY2TuGq%`mZN{K3PnWtO`*|d~gmvsYk7^CNY8iQomOWJy4Ht2Eh zTAI;)w$C|~KHQbWe&twCZ}p8h-O~unbLhHLWDmOej)G5hNud@hwt?RZ1AA}6+BkLGMlFw4T~*p+awLIzIw zec^X42FD9a+|0!TJLE9iyaShQ5#rkf^}=lTH?U{0f-0F;!mXkvY$#a? z0zO>-8!``5-fm^A;>*Yc&ilzfW`a{6=0gAP6s*?>K+R`mFlU@%U5X#uGf0kAmEgD9jS%oTkqO*d3_{Xh znMbl`u}We-+?@9vt(3-z!KL}A9dnaD?Uv+Cw(lkO*GeEs_z4K9wXpU%A+Y6d4PM!2 zK=PND;KyxRG{`XzPPB+X#!({(tG&+5HW$R0+p5r*FV2hN#c-_UajfNP;q;;9^r3ty z=2`~OD$kWv>cs^9hi%RX_a2(+E{>tklsq9iPJ*7S@WhqJId=7RFFO!3 z;Zk-Btr_@*ztrXN$LT7({5FY3EkA}iZzsbG``5JPmlQB*VNf+{08;OZ@ZO4IHY944 z_%j7KJ|P5aNiI!2B*K^D*WnMw81C#M;C*W*sJ(m0)W#k{op)DZn(ZE9=nzlqU9G6b z_IUEFe=1$IZZ$MiM}WZ|QQBAfheXWO#s?AxxNx|PG_>cUlXej#to@FQKlBjkwnUm3 zitsXtYxe6)tmjwH>Uo)ABh8>Ghlda{s6STC1%?b!LoYYkPw5cLJ3~*6?+LKCqidm~xtqH)ReaBmox7*Fe>-Sj_diS9>DJi|7>Q!6hz3HqB+2ZgW@S?VK`>y@5)^ zDLj`IHh2beMT;?kbin%&0UX{CgHPs1!FGoe*fLLw_dClRdInA-$rgjX`O3`hZW(-& z&{(Ty#5uBWe1-GcoVVptI@87P#i#{an2>*8i1o?CWPbO4{N}F;Z?q;8t?DK)?or^S zUEN7zXS@WVI062rW{z=Rvk^2-Br?9F9V1LrP~^ujeN(-hF8OnnER9V^Tvq@aLj`$v zA|vqHMGchl93zHp%h(lP4q;=XGs=x##8g35Z27|NxdU23kaL5)4nIlnK8b)9&esw6 z;{|RQ-awyLjT4=v3t_g-VX!sz#l7E8f--v@wYR^;)w!_{;UtZJeA22o?TcWotp9~Z}xchw{V&cl46JYt_xV7B1-CXyq) zjNR-i%zIljN*aIlP+wtT`p0P(P8;lGx8*pYt6~G**mw?vP2@0sZw=0qD8j2tYl!j0 z8DO{IJed0LV`Oeypw@^IUpFM)GGlKwcuVfD`&O9(1BPa>;)XkYXK;z$uyV$-%2S;G zPLe9^5ka%*0*v5m1%5~Idos!EJc@lPpo=bWw?qbI(D(?V?02MdPntp9&@lNa?#Swe z?7}OR*|bH}nyu9>1r-NLZ10xCo~M)O(YD`~Zh9(~55H)T$v*eEEJrm>dfkHSHIwjr z=xm<5yex0knr=EC&8Gof;!xCPPe9@$`Sq!dyC33?R*9T8Jn$WU)E30M?X~Qh126L@9?51{9#u}wzbf+ee|B@f zRRYlQU4`ylyN1eV)PvxUbSUckN{aoOV0F+4y|=3s^OJXh$y9yZw0XdCq0A|Gv1cW| zDy+wZTiN*1!32H@%|Jn&T4Jhm57r;_gRD?Hn4MgLefdFThvx}=T5tyLR@-A*2;u21 zw}T*=?ToJ16kxueBYTI0`P1^xg08?#;-T=O?z3+w9R6?_81@|%(lq6L+Hjuk*|dQ4 znUvCI?fnq_Z9S7`ItL9Sqwvk@2$-_^0UNkxCkSlk{N?UZ^tyi#na6QP)-F4-Gd~?0 z?YVit{Ya{RSrTMQPk_FkA>HKSL42h$vFHo;OmKhizL;jvv%C#CbFINxIgxmv1$ISU zt_v0AJn-Jan6f?>HFn$3j^*@N5!%2sGc9!tD{ZWUb zFzql@;q7=3j91s6hr+-Ss+pPruFX#%^lKblZgdI%EW8Lgn{w%gx%yP*i8IR7R+H2V zvivB&H$--G0m>TvWUq4>Q^P~~Y~a2+HutwMS*+!R)l*vFyj27++|2=zs$SyDWtEJ$ ze2cQ)UTWuT503=-bcs*^O&NTQ6W1!yq=}bs{;&P`M~~xOS1xDcx2(Yq?m64IxCw0E z4#6pg`yDFp1UH!>w4B1_LmjJ$W_uPmNYzu%&aLR$B*^MlhMLb1+lM*#&*1#-X{gtf zPPXmOpchu}!yTzlP;JdQwogYC?`>TJRR!xYRa}uMZvV~3Kf8toNxAT}!<~J+?Ieu6 zaiXiZe6N7TZja6;$B8(u9!nc% zs4!M*MbM!B3AwQ}mK-m?0OR)uV8Zu!_J?E;dh{%zTK%T%Q(5k2RLVwDSw9e9wA7l>!5l>&zhzF_k1}sxA}Ud=&Jv19A4e9iZ!b z4I`fflQ^mGD3aff1BDaOIQBS9_Ph^n%wrn-trT|qY=ekBESOrPU_!@HY>rrqM(2;w z1v{?d!;eX*ZLnRV1rz}L}v!Zz8xDI+xH^m662gJa?jm)@L4S|zmpeE)bOnaYzw??*; z5&=D0;cSZM_4`nBuL}H4yiUABH={z7AiA!zC-0u`MytPz@MO+Yx`%lR!v;TT>fVXC zR^budkn0YOowB^rv++mbLuk3lQfA9XChL@lP;f}F@&y!p8dUDAv&??(U)b8Uip z!6!9Jo@U?_`H#+^!OeIr;T#(w{#Bm_)-ZQ zHXGolO_8nQj^(_gwWC3kK%5ZPhKT^}#i_fdCfZYCD5Ja7!yKg4OhE)@zu_Y*G zz8wXd8u0hlDAFItxfZsVLcSb@0+U53Rmk1@so=QJ*T*0>^E9R??FC`Q3GkV@O_|JR zsI=e*#OiOvc@f;R(X)(g{9{G~RCC}YTS@DeUnCC47Vt+Z|FZdU$5B1RfU4BLrn_YQ z+0tjH@aX;5#OC=8+{aXq-Wadi_)Ly)qM_7k5W(- ztx87s=3-BdC%qqDOqb5?V9$AUVbi}?sCY{ax7c2S|LTu03LjRX&XZyoFHOM9f|{u3 zpaOk6x8U_oL>JdIqV%tlIli_OV6HOBCRHTj<~@32z=^lie}HCN&BxyKE9jbdmE^wP zjx$zFsC(Vwfo^fyKn==hLz)^KE(oT3f-RYs8ZNw>8B#DWt`4d>N2u{Xb*6N95v4)VnlX2ROGJIk6 z2n(KkgGs|;5EW#~{+O_o?ek7RkGwecq1%73t#v&fWQBQ=H$SqIS4<>}E#Bgp+wD|- zNf{eD

    sj$iZtvA24Y|2SxJ);AnOa)s%Y*jWb(7c1j}XHfd6cukp;-!z$8zW)Z%1 zngCa=D{!`*DE?h@n43SX$DRXwk@XJ5xkGY9>rXRzkokZf#81$gr-*}eaIy}pHuQqqM(!?LbOV`bD#zu5XA!R> zAwc$x;601o;8CDXb+|c6FRz)ZmaHc~Tc_~zUh=5ff=IH}GmzP|I~_*@lDPep06l6T zi3h4(@iWr|8gqCsW0@}R@Se%g_dAqap3_0ecO@RPFdM%%wURpnH|hGM+r&6LmYwZ> z4tv&Hg28cPY}H-}58~RW=9@CsY`rG!y48vYT5qFGXOYE+_&Q6Fn2pk-DNr2v5F_8L z09T#^C@M^%vFb9+^P7J3%e^FGVIoW&?nTqz`FBCu^)cG5ehSI&W9W>Q&6wQPOx#}e zV^n(%%${b7$IiY0B@2!NjCc*c=huUY{VPV`!$kh8agOQld4tUp=QG_o8Tif73T|`@ z!C%Q+7&6;~Zc0d^svFk;9btR`X$m*?qtYaDl;T_jWQA-STgg+%3vip!Plts%=+GERHd(70) zqx)v8gSBEWXw%Fpy6vhJ#M#D^Tp`X2WP6Y8qg|+L;Q`HjK8gOBg7$T%i0CZ={)bCn z!HB!dIQlT1N$T`KRr6c$-~5~SQB#Syne`ESvv#;;a{;DzxRWF9C%}JEHwH0w==W5Z ziErpdB{w(R*slU{+B11B?v&0UCn5awZWuBQr@AEoS^W=aa!PpJUHoU(45e%!APfme0lSXVJjH?5`XD#EDryWiyf zixY5q^#HfuNhiy$wSvsM2iT}258W%{pe$xLyt#RuRF#Lq%M*&cA$ePL8Bc^qC7WQ$ zsxgvq-xnhfEy0L@0r)R7nd2S>ASo}1<36&${@rsd`r*Y^7IuJ-h!F0*se#9*fYEr=$mtM+6EIIT|oYyaX3~pWLdK$AYr1`UR&E32e)V0_RmZu#D)z916$<+s`kH!_IQ~V53 z{?|h4$PjkC6T~QiGq78wk}kW*)yVQ!F)!R5LAeKKS&6`q-zNOrcZ+;nSxxh`&%!F- zaC*VJknY={2MkZYSBccE6~nj^ zEvBShoVU2?A9b*k;0YWH!GJ1D>NMR8(<>@rU7sNq-P?q>1XAJ8rv)den?jN2Oyv(HCrkw4=bqqWVG>P(Zu$**tHrt{}eKT(UDKb%0Lx!2(I z@3mxwz6v~jSOKfIHRGuGa=bnE7_MGR0sE(MthiY-+FNd?xl`_6Z%o3<)Pyw4* z=mLFXX=JeR3vL&oWZKAUm~!6ZcEL`%uv`}_9N#k*H&rRq&_f>`Ttd?f zPr}yUzv${C5!k#Xw&rmB6-=^=L8rq1sO-0B7|}4K&f8P4uw`#GL#O13)w66*rD z@?sRawh#`*3G?0Oz9c=~+&ewV7s~yEaq7kdZkCn;M`rO!^3yn!ve^r4%UgEoPAh28 zy+EF@Yw;pCliJUGu#9cB!hKLm$9j|?YFLr|R8~e$Zpa}cmj7V$TNP}XW(;Wtw&den zJzmGJcNjP@6~}HI#JZJ%Ow!GF^l`EQeJP$sb(v$BRiDmY{dJ06Jdg*8MIXrZ&6bcK z{G4W7d5+3^Zb5`r4DQc;$Ik2w#xH*DWRHv>f9`yLh&{CicFL#IdLJh;eD^Lp`aZnw zb+t3o)7}o!0z2@P$w9E$QbV>MK24OS{h?X{t2mF$9!otrRcLeYU|wI`f}-9vDAQPo zYtD?+ZeBf!nC9-oJ;{HGh4(zB?@%f$E2Ib6F)`?E;zY+zPKTt$CunI)72|aG4UOtw z2&Z?2QpI+8_;fb{1-7SRV88$R=@Q>vlxm#4ais9}p)w53~Q?4eS zk7vIKp@^^qh9@czi{qS^B*YLZO=HNt5za^XnX46TDlsqb6ow1Rk%Z}H^te(DPH`;5 zGx9O`qSXfydnUoH7pE~%p7VtVW?5+TCeZ7`59x%1k#O8?8=ZYh4)=>DlWm!$+&$7x zRDSlCqztQ4&s!pR{@XFg>+Q4Lkf%%JEo3lJ>@4lNZ~^Wtx>RetrW1Sv&O=W1CdN!- zsb%AmAE7T3Fb&lKk8sjP>A=Mq%vEJJqH%JLsRw1)%kvh?zs8N5sKx7mLd z`J~y&3%*~v3r0mNuzYVO(d3--4oiMhy&yy4r<{j2NymxE?|RJo#_gCs&Y@?23iB7` zonjvP`-8P{Cbs$YQ$?8!GA(8W1Z8>Rc-v|Ajbu7$UCQ}sFZ5dOOK8Ijvt6Mo#hMYm z)QEQt1L->(J#ex+NoZy?HeT4a2oe@c<-EV{o-==|k zAK?XcH9?_1=R8oZz+aYzoZGLR&71rKKF{aV9}_vI!B2r^b*|3q1@J-ImFCXAa) zuOs(=u4SV(PK4Ai&*4#r9jIt2gZte*OdmIU|GYZ^=HF?<)<#qEezOatEPext|K{`O zTziV&V!oq;b|fY^#j%1$9w5~DlYL|;3Rn8L+iP>TfzlEo+LAF5f(1-5_4G$PR~>+# zZg9@(%nQuComasAx;rdgx(CzzPtkgvc4%I8f%%s>2rlF8Wa_W!WV7=ka<)>B^W18Y z-In!Kb7u_to(`wh^-oFTuCbcyFoe=K)ySV8A@Ftd69)X4gBw=uAUtXUexwgMH2L8uD)$X2b<5fpUK0FTI z9rNb@hy4G)Qsr{Nu`UpQKUo5jUn9!mjpIgP)}x^?{n`;2@~DF#iz}Re!mPU6VvmYJ zC^{}+viRK+b+EasS~;ov8kgQi|6gnZB?^ z*hjoOdnL5Y7FlXtEWsB@xjbibla=_Ev7fG2Nc@qhjtCdqJ$eFtx-+a79^aVj9gqY|0nDQ`^pM5cq=yT;R zCu!{WDqLw_i=Wpg6TChu7of9%MX zo|}rD#ZXNBbe_cjZ%-ZXG;#!b{TL-Q`bBWE{@&46d}ZuA)VzNjK0Yxb`IX{oS=>)6 zFnnYJEwzcHzjYE3S`IXWT4N=oSPmrh6_{&DKX>lK2mMd8wAc_5zmudawK(6#`9u2S z@W(Sr{g(Z9p!e_1EL^v}w6S0V$)DY2C1gJe2FrKtP&{>?mmQz(xLaHWm@?h1 zXcAxc_ic9TV!-=w6w93S1{^L2VAvDdU(H*s!Q*0ouzoZ~GDJn9-zV*jke%9&Rdv(A z+wN?kEPL5@nvi81g%e-737NyzlKxQb)EV-*Y)C$@FPLAn1X=%zF2BG^;ToK~)0cm< zIFsnNId&s#d%Oxq6ioux9kWP%H4dj(^MfV)-fPZazMsx7R;fz3Fjq-3dwE#MHj^Xq zBVyVMOO{lFetH^T7yVO|tuOd?7<#SaECLe4rO$*rwZdDz~_|ibuZNEl|EU)4lKkxp{9)@Bajm~R8Q)qxKXBuUpn1X&K6QF$;vc*10Ketr zPUf*ai*Xm$akBNd*{z#_rTjc;ZlW^wi5!b1Yifvo7Jlv~z1yoM4e2BZjsJ}$`n%JZ zhfjt~=B;jO!P^}hL|Han#`hCv)W@;iA0L9@8$F``v)@lM)dL0)dwh^Ef5-%a*DB;< z{FP|O>0g3bd=Z_WW`sP&WX-cos#+;!uWCo?OVRnssa}air9}fJhnOJ*H*@MO<+s>L z+Nv*5JGniN|zwYVNgh2|=kuy*&NV2QoWXG>#WCY@wE|S@& zMscdQH{o};jUCf_`x?|kl!WIwMv|<*@p}yx5jz69OfZ7?m*q%(iD%BnR?-u5{2 zdGA2354aPFj^0O$+4+`Oanvs)JfUFW3mL8XQ`AT3Ck?4v^Z&&c8fzd-B( zDNVW1Ym+w7pIn`tgOYJH`8A~0Z@j`pa_jL~v|HtQ%n7?TqE_;!~ zZ2@sc)a>e#rc8jpOpxNyjxy zh`-zgOQty?gH0G_CFQB?BK0{1+=P)UM}zT$H8`p63hAH9qc6B2i?_j1buS2eT}J$~ zb*_NU)Iq{=697&9-(-B*Z;gehxo7!Vo^o*J=QUE_!dL~#Jo6QI?&^6s&6K6-{It6q)IQMgs@8gt*1Te!-xk@(C0zRhl342X{L+^xx<#8NLq>E$ko z=+`{e9!p(^6&f6v1Vh%-^Xp>x%uO#H1zs&SQs)9qNtQj;yNcqmX#%(vU%|A1Jw$(F zrVRvxz)-xfzZz?d8I$$zyGdU1opcJVn%(#@+&U6}{T&2C)`tn>?HK|2&M z?!rYjxiOdMW8Lfa=#mhKDJmn_#zQMe{~3L^mHw<9i&O4y!1FE3iT-wvuEs?FBiJJ8 z2x;=;K$hL69y|_v*n+z8oA5-Zxg`Fl1|BwCJHVED#;{;jAb5^;6j#43od2^h9eU~x zCGoR*H96?)4?zdJB79;zk@U~GZQ6Y2mnp0$I{_NU>5=*$6fVYeg;KQa@SpTz-(iBU zFW-ctB7%9ZIo9yyzAK48b+xi&@0AVSHI_q8kC6m7+ocbG zI_<>fd*4C#GmW2ny9;Y))=Bg^^Z=R$#WGob_jel#z5H|_%ls+7W%)G1kL>T;?AFDA z&Cioev*H@df1)a=2!}-3(ch{t!($V=Njb`|U~=&VqR-UuM_6$352ASzbbHi|=zpf$ zc06F$jI)Dg2$evu&lem}gwc-$Oo-Ylxav+I@!jj1pse~EgqH_m+L}nB-zRf3prm9K zB;1`PD6B3b`afOKNl+cIRE!$%7yP=;C-|NY=OOc40iXVTD~!LLLh?sWGQ|#qr(yGo zx#IDW3Iu4y`JIMIUo}Xs((q8e`w>*JPOXJD>ktsdG-u`>xuFEi1 zDW#I}HC#N;KdY9w7NhlGGkFn-pEx#~^=VoH0c*loSc3+MFTXB{p9s78;nr@bw91Q& zU-o#n*{zF#ZgEAZ((4H}e!Yldlv;u8=(7*>kvckANL`ang_JXvME^C5G^7JLH*nM{ zfs<3m}evlM-h3Knf3k7mYAvb!&MxAgS2Rs}(4fL!|DY;OS)~lJ^&5|x1}zWk*r2LU=sn;tFN?1{ zqJZhkk1)CW8@M5+Ji%pu-)6Ti1|nR_p=x42et+PKE}uIReHkeB7BbJC6B7r|LiI=L zk}NynUIFkW7~uF*WsEC7Py8o$?jclu3BpfP=Sn8kbbXJgmF_U+_i^MbJRHZqwjll` z@xMXqskJa~=PD`W@k5l&uj*?cq}TMtOV`d^lyR&HPNui`7WwunBL`bnnU&JcZcn$upA z+MQuXUfN*5#Lq;Z#}=I8o(Z*VZNO0c&znB~pXsFeZ+@Apz`_QMF0G*5yG;!Mna_a0O?6Z2ZE=0To%5Cm%-vc zlci!QfYjgDW)((i-(_+eMxkN+JmR0_u8z5LSL2M_3vBtXT;e~qaxT0yalraeT|q8+ zyQ8d}*58+tD(eh5rQ07tJL?UJU)9eS3W_&izhHOB@0vv7AD;OhF9)r{Fm?i+&aEQ; zM^62N#&f~a+J5x{T6@3x>G z25OBKvgHCKeF|-*{zEK=a%I# zn-9c;C-38q8wo_eLk?~L|NcqL?)x2Zoq3VyukZWrka=Xg5EF16PZ^aGeZ}@x!N34{ zm^FMKUJOzp{#|Y6z>hsM+4t-WzWK&1;;&;`&&Mx}K$oI{F!II-g4Y#ykaj(L!WY$c z66%)w5PWvF4_i}b1&UJ~1sLK*`g_#=SK#whL$Ef0{CqUrI?ov&iACpy2@8l?`Tk!$ZiRxM$=bLA*U&2wUL_wZdHD zFAf@wb2HDwoIq7nzZF36weJu{gms0*@;l*4?JSUuzx0aTubpFdRs$gZNGa)$o0rKgsgdX16W|k`IqW=Sy{t2Up6$RrR|>UkY*k`SwHAadPzx@Hc)$`e&e3AggE& zLk}$pb)5}RmOY254_La-KwPTZTWB)cN%~)Hn;rOQ9Rx!w5AaCJB>DF`w-?H8EM+bU zJ5ep86Ty`isR_M0?hyanJL#CCewF0czjYpt?;p!_M=DDVp&tou*!LPAw=jm){K>%; z#>YtfZMM&t&9qYfSg&&U{KtyaKjy0fY;5lVy$h6?_Rs_}|9n|D6~|lTbLIKUuuASL z$$yj?;^m@;>}ygou6N!=@QA~LIHuKe1-&Fr;{#K|6?HoJ8*AQtXp`DgcXPKi@+{=68XpT$|H#MPH0*)D$$BKw#T zTrCRVdckz=rfUvv+ZRmq8<3XH`#e*Gb)gzk^MEGue$lYsM>eae9gM%%1E+PlP3D(d zssEn+tqK}Tjl-*NThuX+8w4Coj@;4dkqN;%!o;6O#adYVUS~StY6zyVe z2B(5J^%WVPk;Q4&(cvtY(eW^syT63sE0U+UzV>nyLB<(Y#+&9-}uhv`v+i>>p7zDawRkV!hkt& zN*D^xG?vh2pM3kM&U6U-q- zL+X6sCc&qcZ0G0PiNfTrAe^2flK8>ue(YsQ zB#A%2(GyB{+;XhC6UO^oT}$G7gjMsyY-jM*^)6uVyPo(rMSJqUx&WlCyo3eUZxVh! zmJSDlNjvxzcZ^~1Z!Ho(*7yfrJ2nM7nzqI2l=$;>$qLsUi`DqJ(w*2ZFcKs;1Yiw@A{0w>SlR-Y|3-8 zY+S4xi60`;@n>`#My)m^`ajdlPMW$?9v(KHheKzhiN5tFp9h-*gJH~gZOFWNl;Dld zKl$@!KXFBOOVsr2LiGLiXaEVvIm?zq zaqktd;C)|dVA>lJ|Kl?~$=Ko&s}Hf2#?}Op`rhi)uzDwZF!WcE=2}~k{@g!fISxDF zhoefrqU9%Rf`1cc;8vp!sHSt0b!mto`Bgs*5VpN8L`%2LIQfJP@plRgW21W8g97B^ zS(9tTU#YeZRCNQOV`v0;jAP{Z>?|r}LsbLCs7+s4hlRlem*um~Ze0u{c^_dv-l$62 z%i>|pOuD~r$%4*O`JWd|)#M>uonFjj+1Vc1Q&L%FB^}YM0n_?=P8OdzaEWwoje%4; zvji^mH8KVNrREz{ZqZzh5py$_)?R13pkK}|!hq_3o zKKK%SCnyJ^b@@$}_V4~Cv-@b0U)sBcU*Wa}EBe?=16UpzUw4Z%9J!;XRDRJ2f=@0( z+40RvNU4CO5$&G^a)((t0;W>K3v+$GzHoC zad&?~)s%~vR39XLub|&2F`G3QFJGAemp*?%NZLlm|LMg^SULX&AMBaT84qt;|0Bz9 zYqzu2hYB?YCh%vUg|IfUk?7yyMjVVWmxJ85L!fe?3DNJ=9X6n_CK%^h?&h8q_9plD zqi)^e{(4#A%AoN$WmGolKfPW`yh3Ifujnr?gIA=LIKh3TA%b*)p1)UGgu`0rk;0%X3yAOyA@zMPy^q_=^`-Qk%g{l+3+eAG z_X6?hQXMI?Ktq_DMb|HNT;?h52@99UxAX4JaOxJXGV?`tXrmpy{wAi6)<^7cA(m=%TQ0qz+0BZsUnUfO#ByB+!m z245cDq`M==;b0EUeOAEU7JgiFx&_2p(plDYaXsCK;NI$9l9Q{U3BXALLIB zlfqvG;g(fXNPql_m6II2g0R=MNf_6Iem}SD#bo%N=E3=z^pP~?(*3hGYvV=e zCoH=ajIwx0NN^F4>yeP^^#`(+O(Xqt`%Zx9`|25AU;GA!ehekJ zZNvpI$z8;hPpb)U#;qXrUyLq5R$>e@OH_o;snI0=M!h6)_o)gP7rzk9V8hx)`wi z_ieqttPCgZS^{Cp^!%{ee3|re(H!Yx$r%WLZ%y>mu57*J>w6ld+pBnUdHVjZLOmKf zZYkQOaFV1%ZG|!&@HS~Isc)re4Q8gtLGX-W zl6FWGC(F;+9=1Z%IV+*7)=iuqVMgjR{dEBjU)~R0La(#+tE>sXu|pIf!Pb_~{H1{X zs%8`ZWb?Jzt&4%(A-Uk5H5YezmP7ZOTZw++W)J5}*DuDc4W^P~a36x3%*?{95o09P zEfdkJ|2U%0z)ptJ7PHInIchJ&h3F7`@kI^Lsr3+E_CLpbH_-EER*DUdJUUUhjz#F_ z*Nya_{~ZURL(?-X3`}C3>?MNtH@}Jzt1}7*4X}|)%<1~$&^rm--1h*1T_#A=6lRn9 za+WEx?JHAxt@n$i`z?87{5!823U|8>lHNVrCUx*0K>R2EHUL8pKN!vTmprHO1h4p6 z!QS3`<9O!357=oZ-G3x*=!*FKJ=}Qy?;C7u>H7=6!S?L0;aKRsrxbApy*{%%d5FKM zdY$!gif3p?pAWV!wYQB{mVto9uHZ1mjXjNV6}%4JWU{#P=)0_&lALs+<{@l5PrqN~ z?2w2bub1Pt<;AFJK-cHCcpSjcOPM(TX>I04+d~h#91X*+4VAnsyG{cShnyXc zu_cXuU*~T5LMi9Z1N8AM!7i5q$o%n3yBfWlFJb1EJ1{)2is;)=FP=4r=}TUo7DCVh z`u=SHk#e!YxF_B?Sp{x)==#2PYqCW3jtoSu3CzQ$llZR6jzUM10fHLWS8z9JBK(}4 zo(b#ncfsN{ibCv@%OE@c@(<}4mbQm2?VJTkuey=><894u#|;bL;AUrC!SJ#fiJx`s zAg);7AMY%3XEAPcf6?UReBM?)hu`>cAQ+#a`!{6kX|r1w12JqMOb$QD7Y1xboePJF zzS4Tw37d`vz|ph<=y2sU!PVzz2(I^P!TRxb;9Q3geK(&;0>v@$c+yl+a-B!_$29Hl zAbi>40@phC5=wgaC;e^KZYvBq;sgh`WJ#NArV;$$Nfjaft~=l0st;2PyAnLNNL6U^ z9swEo`p{Lm52=6L9zoF7a}ZLteSsDGTnV04I9q&tc^l03;nCp1Z&6kqhZFbXpkNij za7_q5Caa#z58}5m5PZc0sw{%V{QtsfeV3Kd&C5j6Sg0!8G!G^DXZ$Vk4>>n{sPIYsJ|{e7F=x)}IblmhmB zl?5e@Z>;}sXVSkOuZKf=Z!Mud%So_J^dkDaKA{Kv_vI}=VTvGFY7HR%3*Hn%!jQhu zF!&9eh-oJH_wd&+KDdjJd2heq^rMqR*gufKC9X3UdPa{&y|pKZ{~;}NVRGR@zUJ>! zMDYN@onniTyS0mRI&=g-joCrs_bSSPs=j^D^*}Pt`;&sQ`Q5*d#5%ideATC?Q2IE9 z#D5u+0TVB(33Zd)rC#Ry310Ih4uq@EVCg_*}@7u?_@Bf3Jf=5lem%T-Y}ew*F8 z87L^4fzpX&h>t&l%egwDFVBc}(vuygT#c&}oBykd;5pZ?;jO!kqW*gUA_FoA&YMP{ z*T1znd%DMSyXMss{KuSr%)jp_7`uLx@OASKlsy{9CWhjNz5?1h48e;HJ_P@xbBPsk z?)aZ|5yW*mM)2qb&Tv&XT$B>B@JWOg!Owr0hlRHOpy93soSqiQ=C2sno7sCAqV4q{ zBeZhkic-McMdjFMnc$G@t2h?hP7= zS)@KSvq>y6c{DCDQ^&adT}b|gR-^IYw})(3e@!-IdmYK&c&SuOwoDW6xBpV0+@(92 zUu5fPvs)Jf$9Gn<2^zy#Oq>tuFZ87Pcs-pR3osDPLa$->Vcki8gdGu(Q;c9A{jad{ zBzpZYe%fPJd_0o%b!x;JiqAz^cKa>1Wt&ysvg}hKOgGGylf|DXc4y5!{}!&WbB3@? z`u~GvEEcue}IH2EB_5o5qr99_+vGXn_jI`19N{mYpSNdyxibgfvftU>Vob;31b%f=nQmfi@4NVzOF~pc@2C{cwoq#E;US*NNy9n8 z6Scu&$iRyZNw7NtIoSTccZ_3Ws~`e((s{;!-I3xUhk}P@&okHdj$EB@LzJWtw?u-o zCF530LN@!7uOrC4iMq{M+`f06)Nib$B2F|>jhM9xdBbXDRe;{kN+(O%U%_B zvb<0)w_14^dNm!ioZm;|GdtO}Vk7>&r;Bu-gfnB8gDHP+f#(lzKvbCpRqY12?>&xs zwh4Lef4KqAX?93D45HK%DD3+K8^&fqm7+xdD@-K2Xyl!UF z=iq>{RmS+uRH})^qJ|l+%i&>%7N2uzNB5v8Hxq43J&+KZZdHVNps77w`tquURxJ-E z++Ga}J3fR0L9?_iDT009s3Aqw5jsC_qMGx>A|^|5zC#bMH?Nd!IU9v9WDNbsyKw42 z9kzdDM)hYlSecv-rTMj(q~*y2Y}+BbNX#6jy>Rdi8EyRbHvGDAgg$^wuKQU9X_gJh zj_XDEX!mvWT0=<@wvHmJytFN{msTi-tpzh$6n9*Ll*boY)XqpMoBIlTZaL`BmS5PY z-!+oEFOXN1L)}+OSj7?^-rRPKI(=q%JkDclD#KBCH=8D&_JR4#DE;_T0S=`-#l>88goRBHSl)yC6jN=45p@LZT`!5`0;QHfPWnzCN*q0{H=4 z-<#i{%BLm%V4N-4aGfP)HgE>9nx-X$U)S(hnDb{`VejXBz8I=Gxr^eSt+M50YoK4d z0YQ|@1vW-AYlsA+XZc+wUiN`k8?ZLknWg;E~g97RoP>Ws9J}*I1iUB&{Bf&A?BW$gu&Bm zG3L$)OdozkP4}AP_a;bLa2Bw0_V+F*}0e*s&|h`&BX_C hcS}8lEGwaDmz0U^S?qS*d${YCo2sH+lo;e^`yX@IuO9#a literal 0 HcmV?d00001 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_9_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_9_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba6b4bdbbd4863a9f3b0a67c834396746150ed07 GIT binary patch literal 972 zcmWO4jZ@QQ902ejI6<5uzR?amFu^UB7#Pt|2!79Zyqps>6b%xa1$Y?g25b`rT)fF^ zK|?r!@em-4db`*Y-+L4Zo6cn;)Fpd6s)0oCVk_^g&Upi{g8mmF%r?n)ba z{ZmLyW$W1#_W*pd$S3??ZLwaq4%#AJagJ@Jg+e>9By50X-6~P48v}uUTk#g8km-Uo zIvhn`i2&2CSkcam$}5VWeNrc(7#?spJyEcQH7eEaSz8wP3MW{ z{178Ouop)a-h^|r88%(u;>zO->aiD%s?(z9NITW8c?)Wm+CU?5#j1eU z;mD~{tluTWruVrRXuFdsdp1KNjC`bvLGbD4N>KC4X_b?`$Rgj5yZxRLVR9nOjB!}~ z(wn43oXWWNT)@0>f7mAd8Z)Dv;q1h9`gVRT@lA@tz9=Pr^_LjL3rf%~bdc%w47vsR zf%EiHJRmE9fR$$S?0g3o+;ic>#XpG9^c(yj*Wrn-Wf)!COG!c~6|7`Ir{oG)wyZ+l zgJ8zFM~f%FGy>22G8*ohFou7hR<_EhZ?Yq;76sr?H-|NQAHmp49ueMo4^LYO;EVg+ zq|Mw7sTtw8D8F4v!VH&aULS)tVHNcH!CvZ9qNd~IBFH3M z5Q7~%F?9$p-_t>wafJRY+zHzoYarIe1NtY5@n<(LvO4A_HE&-bAui$IoBjWTGq-~C zl;6XA8Iq6_Qgn#n;5mLS)H}U_-8Y7b)eSY2@9oEkdJWOYhw*r|4-=QD1gTHY=P?N#Oiz&G|&f z4Wv1CR{GhwPMR4n#lxj~Xc-T|xvVgnpPqrwdo%RjCEG-aivQqfR(g|dbit^psh|OjXF#Je^>nKH~;_u literal 0 HcmV?d00001 diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_d2_9_3_3_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/deconv/deconvfp32_dw_d2_9_3_3_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e542c89eda71d1ab8d2c4e3b67c76546243b4df GIT binary patch literal 972 zcmV~$4N%i{7y#f@Sl~fyv1+4b#1yyyLoqO5NZaoni6XQu93RCpJ_HoQ14PV|uO=ua z3aFF_84eg@#Nt570QdjB|Jjm;7+e}5M<-Thf)-!rqUUkW2!olJqaaI8fWPy4NV+>8 z4JIMRtVm;w846GyUJK>nkI1ph8)Ut@Z}6kIYSj8SlV|4FK&s|8x^mml%IjtdgNuP3 z^Pp|50$i9K1cCiL@vNDHpM7f(A9q0o-v~unwdgV-B;&3&3_XWLVfBKDv`yv;>ZSE&@uscGjTe{0$HZ7Nuah8p6>A@oS-sn z-V{hhli4&lM2cC8t<2mM4^o$!aHq5Y4iEHFRRQdIaxLS|8RRJ9J=4rb%;=cGYBqkIjgx@iT-M>qKHE zgXPJELgXZNVxKjIY!{D9@10bk)qECD-`NNMTK)v7u9TDrR8&j=rZZQ|nVEW6J#Y$6 zuM4Br2f3(p;n3-a)u@l(2$3yCC^fEvg62z5*WoP(<6Tl6c?Gy#UMN-Bm_D8pvhh7| zQ*hS&K@w7j>po;cqmom3~zM@l_ z&yag49I7rc)Zyh&G)0x7lQ0j%x{BeVvIEr*-oi4ZVAkx1TSv@L+xR@{`6uAHH-fOJ z+XF;DIswTUA#!&KR9T8id`>EubwalE`xeIc`itaZx)76M-AGhm11SEfqtW9bu=|CB zG-Rlf{$*=~h=gG>ytzikf7neAFta$9l}9Jl$z`_AzDJ{8rp`QpcWIr5vo?IAO(G@$+>6Ta(k20y_j z`Cjd_$cm5BCBtjzVEBoo!w~tzZJs=C??sKzGP$94WRn{LK~bS4+#Mm9Ymor#)M8m( z8=8h*AtI|B^ey!u*&!kZ(^}wk=EKu<0eDLC3y$6EAZk$_iC%017q`u5SLdTXPA^OS zwG~4ba_n8{pCtX6)Z(4Z3D%K?mEQBl^Vu__%lSPU&_Tt`J%;v(?P{0%_YTp zYm>!DNuxubJ^@B^B4b4mH0wl!oD;?WJ($8K!x}`T1nk5GjXp#u64ga=Cyqrj@TNm1 zP)S92aVf<(nu0Jz0Y+=@kv zJGVuoTIfOMHx)%k%#1^%Q4hu1nu$bKa<0U%R13upE&RkM`WZ!jy}w1~X?n%;N0UXO zb3er}t0YAE^QlBhP!L0@92rIMA%Mef?BT@A)D%VVob$!1<1EFGDsx2Q{18QZ-VVhG zcIZTY`T4~8kEKQ1)``Vv*c?V!P^U#KJY_{7vbV*m)w)IQD!0Y5Gc-l(r_Mz+M0-TQ z>>ou0Pq@PM2$V(HaHc~i(J@6{M4Uq`Y_Ua@DwRb%Pl?3z()z@=Q=~+S6rRP>LoLN; aF^WZgk~YQoVbF$D9ZyG$4eaP;$%$_ z`GbKwgrPHvG*2fld9)jNyzTdV2YLZR$&v{vAx$8&{Ir?l^Oxa3?vaVpznK^77 z{{h=1mEeE25jSn-Q4L4bEAOI>_s~KQ;5ypeTzgCZ-K?fIuevIhSJ?U=DC(9apo@!Oh7*$9_>hCOBCz$ z;eH3kI4V({@fG|zHb<4Z4ipNcnBVQAf7I84;2WEweF*S=s}=jMT%c1qCMI^CgFTNX zt;gpNL!xSwYVQq`mW(jypgKHd-(XAmU=rJUJ0OxjL7(iBqdR+mT)k_AZAnTfwXV@6 z(=`lQ$)F;;2z`z=_E3VDte^fJ>k^`|0(fBPn<1kOT-!%yPOw#*$R(q)=^EefW)Ubgv7?SUUx=Bc$W2J?ILFwwrKa1~x=6SlujYL=yF z+cL|JFT~M^K{Y)1Zx#HqN@EGa*p2szpk}2g7iGRzLt*rq+4OG z(Z$Y%P2