Merge pull request !4261 from yangruoqi713/test_dwtags/v0.7.0-beta
| @@ -40,7 +40,6 @@ | |||
| #include "src/runtime/kernel/arm/nnacl/fp32/reduce.h" | |||
| #include "src/runtime/kernel/arm/nnacl/fp32/activation.h" | |||
| #include "src/runtime/kernel/arm/nnacl/fp32/arithmetic.h" | |||
| #include "src/runtime/kernel/arm/nnacl/fused_batchnorm.h" | |||
| #include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" | |||
| #include "src/runtime/kernel/arm/nnacl/power.h" | |||
| #include "src/runtime/kernel/arm/nnacl/fp32/range.h" | |||
| @@ -510,15 +509,15 @@ OpParameter *PopulateActivationParameter(const lite::Primitive *primitive) { | |||
| } | |||
| OpParameter *PopulateFusedBatchNorm(const lite::Primitive *primitive) { | |||
| FusedBatchNormParameter *fuse_batch_norm_param = new (std::nothrow) FusedBatchNormParameter(); | |||
| if (fuse_batch_norm_param == nullptr) { | |||
| BatchNormParameter *batch_norm_param = new (std::nothrow) BatchNormParameter(); | |||
| if (batch_norm_param == nullptr) { | |||
| MS_LOG(ERROR) << "new FusedBatchNormParameter failed."; | |||
| return nullptr; | |||
| } | |||
| fuse_batch_norm_param->op_parameter_.type_ = primitive->Type(); | |||
| batch_norm_param->op_parameter_.type_ = primitive->Type(); | |||
| auto param = primitive->Value()->value_as_FusedBatchNorm(); | |||
| fuse_batch_norm_param->epsilon_ = param->epsilon(); | |||
| return reinterpret_cast<OpParameter *>(fuse_batch_norm_param); | |||
| batch_norm_param->epsilon_ = param->epsilon(); | |||
| return reinterpret_cast<OpParameter *>(batch_norm_param); | |||
| } | |||
| OpParameter *PopulateArithmetic(const lite::Primitive *primitive) { | |||
| @@ -28,6 +28,22 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { | |||
| delete sliding_; | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::InitBuffer() { | |||
| // malloc pack input buffer | |||
| int C8 = UP_DIV(conv_param_->input_channel_, C8NUM); | |||
| @@ -113,8 +129,14 @@ int ConvolutionDepthwiseFp16CPUKernel::Init() { | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::ReSize() { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| InitSlidingParam(sliding_, conv_param_, C8NUM); | |||
| @@ -29,12 +29,7 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionDepthwiseFp16CPUKernel() override { | |||
| delete sliding_; | |||
| free(packed_weight_); | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| } | |||
| ~ConvolutionDepthwiseFp16CPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -28,6 +28,22 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { | |||
| delete sliding_; | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { | |||
| conv_param_->input_batch_ = outputs_.front()->shape().at(kNHWC_N); | |||
| conv_param_->input_h_ = outputs_.front()->shape().at(kNHWC_H); | |||
| @@ -126,8 +142,14 @@ int DeconvolutionDepthwiseFp16CPUKernel::Init() { | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| InitSlideParam(); | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| @@ -29,14 +29,7 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~DeconvolutionDepthwiseFp16CPUKernel() override { | |||
| delete sliding_; | |||
| free(packed_weight_); | |||
| if (need_align_) { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| } | |||
| }; | |||
| ~DeconvolutionDepthwiseFp16CPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -52,7 +45,6 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| float16_t *packed_weight_; | |||
| float16_t *packed_input_; | |||
| float16_t *packed_output_; | |||
| bool need_align_ = false; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -15,7 +15,6 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/batchnorm.h" | |||
| #include <cmath> | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| @@ -28,7 +27,42 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::kernel { | |||
| BatchnormCPUKernel::~BatchnormCPUKernel() { | |||
| if (mean_addr_ != nullptr) { | |||
| free(mean_addr_); | |||
| mean_addr_ = nullptr; | |||
| } | |||
| if (var_addr_ != nullptr) { | |||
| free(var_addr_); | |||
| var_addr_ = nullptr; | |||
| } | |||
| } | |||
| int BatchnormCPUKernel::InitConstTensor() { | |||
| auto mean = inputs_[1]; | |||
| mean_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float))); | |||
| if (mean_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); | |||
| auto variance = inputs_[2]; | |||
| var_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float))); | |||
| if (var_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| int BatchnormCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| auto input_shapes = inputs_[0]->shape(); | |||
| auto n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| @@ -37,11 +71,24 @@ int BatchnormCPUKernel::Init() { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->unit_); | |||
| MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); | |||
| auto ret = InitConstTensor(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Batchnorm fp32 InitConstTensor failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int BatchnormCPUKernel::ReSize() { return RET_OK; } | |||
| int BatchnormCPUKernel::ReSize() { | |||
| auto input_shapes = inputs_[0]->shape(); | |||
| batchnorm_param_->unit_ = 1; | |||
| for (int i = 0; i < input_shapes.size() - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int BatchnormCPUKernel::DoExecute(int task_id) { | |||
| BatchNorm(out_addr_, in_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); | |||
| @@ -61,12 +108,10 @@ int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| int BatchnormCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data()); | |||
| mean_addr_ = reinterpret_cast<float *>(inputs_.at(1)->Data()); | |||
| var_addr_ = reinterpret_cast<float *>(inputs_.at(2)->Data()); | |||
| out_addr_ = reinterpret_cast<float *>(outputs_.at(0)->Data()); | |||
| int ret = LiteBackendParallelLaunch(BatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); | |||
| @@ -31,14 +31,14 @@ class BatchnormCPUKernel : public LiteKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| opParameter->thread_num_ = ctx->thread_num_; | |||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | |||
| } | |||
| ~BatchnormCPUKernel() override = default; | |||
| ~BatchnormCPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitConstTensor(); | |||
| int DoExecute(int tid); | |||
| private: | |||
| @@ -29,6 +29,24 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() { | |||
| delete sliding_; | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::InitWeightBias() { | |||
| // init weight: o, h, w, i; o == group, i == 1 | |||
| auto weight_tensor = inputs_[kWeightIndex]; | |||
| @@ -114,9 +132,16 @@ int ConvolutionDepthwiseCPUKernel::Init() { | |||
| int ConvolutionDepthwiseCPUKernel::ReSize() { | |||
| if (need_align_) { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| @@ -197,10 +222,11 @@ kernel::LiteKernel *CpuConvDwFp32KernelCreator(const std::vector<lite::tensor::T | |||
| kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| // auto param = reinterpret_cast<ConvParameter *>(opParameter); | |||
| // if (param->kernel_h_ == 3 && param->kernel_w_ == 3 && param->stride_h_ == 1 && param->stride_w_ == 1 && | |||
| // param->dilation_h_ == 1 && param->dilation_w_ == 1) { | |||
| // kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3CPUKernel(opParameter, inputs, outputs, ctx); | |||
| // param->dilation_h_ == 1 && param->dilation_w_ == 1) { | |||
| // kernel = new (std::nothrow) kernel::ConvolutionDepthwise3x3CPUKernel(opParameter, inputs, outputs, ctx, | |||
| // primitive); | |||
| // } else { | |||
| // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx); | |||
| // kernel = new (std::nothrow) kernel::ConvolutionDepthwiseCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| // } | |||
| if (kernel == nullptr) { | |||
| @@ -29,14 +29,7 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionDepthwiseCPUKernel() override { | |||
| delete sliding_; | |||
| free(packed_weight_); | |||
| if (need_align_) { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| } | |||
| }; | |||
| ~ConvolutionDepthwiseCPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -27,6 +27,24 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { | |||
| delete sliding_; | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| } | |||
| int DeconvolutionDepthwiseCPUKernel::InitSlideParam() { | |||
| conv_param_->input_batch_ = outputs_.front()->shape().at(kNHWC_N); | |||
| conv_param_->input_h_ = outputs_.front()->shape().at(kNHWC_H); | |||
| @@ -126,8 +144,14 @@ int DeconvolutionDepthwiseCPUKernel::Init() { | |||
| int DeconvolutionDepthwiseCPUKernel::ReSize() { | |||
| if (need_align_) { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| InitSlideParam(); | |||
| @@ -29,14 +29,7 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~DeconvolutionDepthwiseCPUKernel() override { | |||
| delete sliding_; | |||
| free(packed_weight_); | |||
| if (need_align_) { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| } | |||
| }; | |||
| ~DeconvolutionDepthwiseCPUKernel() override; | |||
| int Init() override; | |||
| int InitSlideParam(); | |||
| @@ -32,6 +32,12 @@ int FlattenCPUKernel::Init() { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| ReSize(); | |||
| return RET_OK; | |||
| } | |||
| int FlattenCPUKernel::ReSize() { | |||
| auto output_shape = outputs_[0]->shape(); | |||
| flatten_param_->size = sizeof(float); | |||
| for (int i = 0; i < output_shape.size(); i++) { | |||
| @@ -40,8 +46,6 @@ int FlattenCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int FlattenCPUKernel::ReSize() { return RET_OK; } | |||
| int FlattenCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| @@ -15,10 +15,10 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/fused_batchnorm.h" | |||
| #include <cmath> | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| @@ -27,33 +27,121 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_FusedBatchNorm; | |||
| namespace mindspore::kernel { | |||
| FusedBatchnormCPUKernel::~FusedBatchnormCPUKernel() { | |||
| if (scale_addr_ != nullptr) { | |||
| free(scale_addr_); | |||
| scale_addr_ = nullptr; | |||
| } | |||
| if (offset_addr_ != nullptr) { | |||
| free(offset_addr_); | |||
| offset_addr_ = nullptr; | |||
| } | |||
| if (mean_addr_ != nullptr) { | |||
| free(mean_addr_); | |||
| mean_addr_ = nullptr; | |||
| } | |||
| if (var_addr_ != nullptr) { | |||
| free(var_addr_); | |||
| var_addr_ = nullptr; | |||
| } | |||
| } | |||
| int FusedBatchnormCPUKernel::InitConstTensor() { | |||
| auto scale = inputs_[1]; | |||
| scale_addr_ = reinterpret_cast<float *>(malloc(scale->ElementsNum() * sizeof(float))); | |||
| if (scale_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(scale_addr_, scale->Data(), scale->ElementsNum() * sizeof(float)); | |||
| auto offset = inputs_[2]; | |||
| offset_addr_ = reinterpret_cast<float *>(malloc(offset->ElementsNum() * sizeof(float))); | |||
| if (offset_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(offset_addr_, offset->Data(), offset->ElementsNum() * sizeof(float)); | |||
| auto mean = inputs_[3]; | |||
| mean_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float))); | |||
| if (mean_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); | |||
| auto variance = inputs_[4]; | |||
| var_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float))); | |||
| if (var_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| SetNeedReInit(); | |||
| return RET_OK; | |||
| } | |||
| input_shape_ = reinterpret_cast<int *>(malloc(sizeof(int) * inputs_[0]->shape().size())); | |||
| memcpy(input_shape_, inputs_[0]->shape().data(), inputs_[0]->shape().size() * sizeof(int)); | |||
| auto input_shapes = inputs_[0]->shape(); | |||
| auto n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| batchnorm_param_->unit_ = 1; | |||
| for (int i = 0; i < n_dim - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); | |||
| auto ret = InitConstTensor(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "FusedBatchnorm fp32 InitConstTensor failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::ReSize() { | |||
| auto input_shapes = inputs_[0]->shape(); | |||
| batchnorm_param_->unit_ = 1; | |||
| for (int i = 0; i < input_shapes.size() - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Execute(int task_id) { | |||
| FusedBatchNorm(out_addr_, in_addr_, scale_addr_, offset_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::ReSize() { return RET_OK; } | |||
| int FusedBatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto g_kernel = reinterpret_cast<FusedBatchnormCPUKernel *>(cdata); | |||
| auto ret = g_kernel->Execute(task_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "FusedBatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| auto input_addr = reinterpret_cast<float *>(inputs_.at(0)->Data()); | |||
| auto scale_addr = reinterpret_cast<float *>(inputs_.at(1)->Data()); | |||
| auto offest_addr = reinterpret_cast<float *>(inputs_.at(2)->Data()); | |||
| auto mean_addr = reinterpret_cast<float *>(inputs_.at(3)->Data()); | |||
| auto variance_addr = reinterpret_cast<float *>(inputs_.at(4)->Data()); | |||
| auto output_addr = reinterpret_cast<float *>(outputs_.at(0)->Data()); | |||
| in_addr_ = reinterpret_cast<float *>(inputs_.at(0)->Data()); | |||
| out_addr_ = reinterpret_cast<float *>(outputs_.at(0)->Data()); | |||
| FusedBatchNorm(input_addr, scale_addr, offest_addr, mean_addr, variance_addr, input_shape_, | |||
| fused_batchnorm_param_->epsilon_, output_addr); | |||
| int ret = LiteBackendParallelLaunch(FusedBatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "FusedBatchnormRun error error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -63,8 +151,8 @@ kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::tenso | |||
| const kernel::KernelKey &desc, const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_FusedBatchNorm); | |||
| FusedBatchnormCPUKernel *kernel = new (std::nothrow) FusedBatchnormCPUKernel(opParameter, inputs, outputs, ctx, | |||
| primitive); | |||
| FusedBatchnormCPUKernel *kernel = | |||
| new (std::nothrow) FusedBatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new FusedBatchnormCPUKernel fail!"; | |||
| return nullptr; | |||
| @@ -19,7 +19,7 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/runtime/kernel/arm/nnacl/fused_batchnorm.h" | |||
| #include "src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" | |||
| namespace mindspore::kernel { | |||
| class FusedBatchnormCPUKernel : public LiteKernel { | |||
| @@ -28,17 +28,26 @@ class FusedBatchnormCPUKernel : public LiteKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| fused_batchnorm_param_ = reinterpret_cast<FusedBatchNormParameter *>(parameter); | |||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | |||
| } | |||
| ~FusedBatchnormCPUKernel() override { delete fused_batchnorm_param_; } | |||
| ~FusedBatchnormCPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitConstTensor(); | |||
| int Execute(int task_id); | |||
| private: | |||
| int *input_shape_{}; | |||
| FusedBatchNormParameter *fused_batchnorm_param_; | |||
| float *in_addr_; | |||
| float *mean_addr_; | |||
| float *var_addr_; | |||
| float *scale_addr_; | |||
| float *offset_addr_; | |||
| float *out_addr_; | |||
| BatchNormParameter *batchnorm_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -17,7 +17,6 @@ | |||
| #include "src/runtime/kernel/arm/fp32/scale.h" | |||
| #include <string.h> | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/nnacl/scale.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| @@ -29,23 +28,29 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_Scale; | |||
| namespace mindspore::kernel { | |||
| ScaleCPUKernel::~ScaleCPUKernel() { FreeTmpBuffer(); } | |||
| void ScaleCPUKernel::FreeTmpBuffer() { | |||
| if (scale_ != nullptr) { | |||
| free(scale_); | |||
| scale_ = nullptr; | |||
| if (scale_param_->const_scale_) { | |||
| if (scale_ != nullptr) { | |||
| free(scale_); | |||
| scale_ = nullptr; | |||
| } | |||
| } | |||
| if (offset_ != nullptr) { | |||
| free(offset_); | |||
| offset_ = nullptr; | |||
| if (scale_param_->has_offset_) { | |||
| if (offset_ != nullptr) { | |||
| free(offset_); | |||
| offset_ = nullptr; | |||
| } | |||
| } | |||
| } | |||
| int ScaleCPUKernel::InitScaleOffset() { | |||
| FreeTmpBuffer(); | |||
| auto param = reinterpret_cast<ScaleParameter *>(opParameter); | |||
| auto scale_tensor = inputs_.at(1); | |||
| float *scale_ptr = reinterpret_cast<float *>(inputs_.at(1)->Data()); | |||
| if (scale_ptr != nullptr) { | |||
| scale_param_->const_scale_ = true; | |||
| scale_ = reinterpret_cast<float *>(malloc(scale_tensor->ElementsNum() * sizeof(float))); | |||
| if (scale_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| @@ -53,6 +58,7 @@ int ScaleCPUKernel::InitScaleOffset() { | |||
| } | |||
| memcpy(scale_, scale_ptr, scale_tensor->ElementsNum() * sizeof(float)); | |||
| } else { | |||
| scale_param_->const_scale_ = false; | |||
| scale_ = nullptr; | |||
| } | |||
| @@ -64,40 +70,39 @@ int ScaleCPUKernel::InitScaleOffset() { | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(offset_, offset_tensor->Data(), offset_tensor->ElementsNum() * sizeof(float)); | |||
| param->has_offset_ = true; | |||
| scale_param_->has_offset_ = true; | |||
| } else { | |||
| offset_ = nullptr; | |||
| param->has_offset_ = false; | |||
| scale_param_->has_offset_ = false; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ScaleCPUKernel::InitParameter() { | |||
| auto param = reinterpret_cast<ScaleParameter *>(opParameter); | |||
| auto in_tensor = inputs_.at(0); | |||
| auto in_shape = in_tensor->shape(); | |||
| auto scale_tensor = inputs_.at(1); | |||
| auto scale_shape = scale_tensor->shape(); | |||
| if (scale_shape.size() + param->axis_ > in_shape.size()) { | |||
| if (scale_shape.size() + scale_param_->axis_ > in_shape.size()) { | |||
| MS_LOG(ERROR) << "Scale tensor shape is incorrect."; | |||
| return RET_ERROR; | |||
| } | |||
| param->outer_size_ = 1; | |||
| param->axis_size_ = 1; | |||
| param->inner_size_ = 1; | |||
| for (int i = 0; i < param->axis_; i++) { | |||
| param->outer_size_ *= in_shape[i]; | |||
| scale_param_->outer_size_ = 1; | |||
| scale_param_->axis_size_ = 1; | |||
| scale_param_->inner_size_ = 1; | |||
| for (int i = 0; i < scale_param_->axis_; i++) { | |||
| scale_param_->outer_size_ *= in_shape[i]; | |||
| } | |||
| for (int i = 0; i < scale_shape.size(); i++) { | |||
| if (in_shape[i + param->axis_] != scale_shape[i]) { | |||
| if (in_shape[i + scale_param_->axis_] != scale_shape[i]) { | |||
| MS_LOG(ERROR) << "Scale tensor shape is incorrect."; | |||
| return RET_ERROR; | |||
| } | |||
| param->axis_size_ *= in_shape[i + param->axis_]; | |||
| scale_param_->axis_size_ *= in_shape[i + scale_param_->axis_]; | |||
| } | |||
| for (int i = param->axis_ + scale_shape.size(); i < in_shape.size(); i++) { | |||
| param->inner_size_ *= in_shape[i]; | |||
| for (int i = scale_param_->axis_ + scale_shape.size(); i < in_shape.size(); i++) { | |||
| scale_param_->inner_size_ *= in_shape[i]; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -130,9 +135,7 @@ int ScaleCPUKernel::ReSize() { | |||
| } | |||
| int ScaleCPUKernel::Scale(int task_id) { | |||
| auto ret = | |||
| DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, reinterpret_cast<ScaleParameter *>(opParameter)); | |||
| auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Scale error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| @@ -19,6 +19,7 @@ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "src/runtime/kernel/arm/nnacl/scale.h" | |||
| namespace mindspore::kernel { | |||
| @@ -27,10 +28,10 @@ class ScaleCPUKernel : public LiteKernel { | |||
| ScaleCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ScaleCPUKernel() { | |||
| FreeTmpBuffer(); | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| scale_param_ = reinterpret_cast<ScaleParameter *>(opParameter); | |||
| } | |||
| ~ScaleCPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -45,6 +46,7 @@ class ScaleCPUKernel : public LiteKernel { | |||
| float *scale_; | |||
| float *offset_; | |||
| float *output_ptr_; | |||
| ScaleParameter *scale_param_; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -28,6 +28,24 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | |||
| delete sliding; | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| } | |||
| int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||
| // init weight, int8 -> int16 | |||
| // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 | |||
| @@ -111,10 +129,17 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() { | |||
| } | |||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||
| free(packed_input_); | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| free(packed_output_); | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| @@ -29,14 +29,7 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionDepthwiseInt8CPUKernel() override { | |||
| delete sliding; | |||
| free(packed_weight_); | |||
| free(packed_input_); | |||
| if (need_align_) { | |||
| free(packed_output_); | |||
| } | |||
| }; | |||
| ~ConvolutionDepthwiseInt8CPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -28,6 +28,28 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { | |||
| delete sliding; | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| if (output_buffer_ != nullptr) { | |||
| delete output_buffer_; | |||
| output_buffer_ = nullptr; | |||
| } | |||
| } | |||
| int DeconvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||
| // init weight: int8 -> int16 | |||
| // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 | |||
| @@ -101,9 +123,9 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||
| } | |||
| // malloc tmp buffer for int32 output | |||
| output_buffer = | |||
| output_buffer_ = | |||
| reinterpret_cast<int32_t *>(malloc(conv_param_->output_h_ * conv_param_->output_w_ * C4NUM * sizeof(int32_t))); | |||
| if (output_buffer == nullptr) { | |||
| if (output_buffer_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -144,10 +166,21 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() { | |||
| } | |||
| int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | |||
| free(packed_input_); | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| free(packed_output_); | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| if (output_buffer_ != nullptr) { | |||
| delete output_buffer_; | |||
| output_buffer_ = nullptr; | |||
| } | |||
| InitSlideParam(); | |||
| // conv base init | |||
| @@ -162,7 +195,7 @@ int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | |||
| } | |||
| int DeconvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { | |||
| DeconvDwInt8(packed_output_, output_buffer, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), | |||
| DeconvDwInt8(packed_output_, output_buffer_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), | |||
| conv_param_, sliding, task_id); | |||
| return RET_OK; | |||
| } | |||
| @@ -29,14 +29,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~DeconvolutionDepthwiseInt8CPUKernel() override { | |||
| delete sliding; | |||
| free(packed_weight_); | |||
| free(packed_input_); | |||
| if (need_align_) { | |||
| free(packed_output_); | |||
| } | |||
| }; | |||
| ~DeconvolutionDepthwiseInt8CPUKernel() override; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -52,7 +45,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| int16_t *packed_weight_; | |||
| int16_t *packed_input_; | |||
| int8_t *packed_output_; | |||
| int32_t *output_buffer; | |||
| int32_t *output_buffer_; | |||
| bool need_align_ = false; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -24,4 +24,3 @@ typedef struct FlattenParameter { | |||
| void Flatten(const void *input, void *output, FlattenParameter *flatten_param); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FLATTEN_H_ | |||
| @@ -19,10 +19,21 @@ | |||
| void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, | |||
| BatchNormParameter *param) { | |||
| for (int u = task_id; u < param->unit_; u += param->op_parameter_.thread_num_) { | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); | |||
| for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { | |||
| auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); | |||
| for (int u = 0; u < param->unit_; u++) { | |||
| output_ptr[u * param->channel_ + c] = (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt; | |||
| } | |||
| } | |||
| } | |||
| void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, | |||
| const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param) { | |||
| for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { | |||
| auto variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); | |||
| for (int u = 0; u < param->unit_; u++) { | |||
| output_ptr[u * param->channel_ + c] = | |||
| (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt * scale_ptr[c] + offest_ptr[c]; | |||
| } | |||
| } | |||
| } | |||
| @@ -29,4 +29,7 @@ typedef struct BatchNormParameter { | |||
| void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, | |||
| BatchNormParameter *param); | |||
| void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, | |||
| const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ | |||
| @@ -486,6 +486,21 @@ void ConvDw3x3Fp32OutputUnit(float *src_buf, float *dst_output, const float *bia | |||
| float32x4_t d10 = vaddq_f32(vaddq_f32(vaddq_f32(t10, t11), t12), bias_ptr); | |||
| float32x4_t d11 = vaddq_f32(vsubq_f32(vsubq_f32(t11, t12), t13), bias_ptr); | |||
| float32x4_t zeros = {0, 0, 0, 0}; | |||
| float32x4_t bounds = {6, 6, 6, 6}; | |||
| if (is_relu) { | |||
| d00 = vmaxq_f32(d00, zeros); | |||
| d01 = vmaxq_f32(d01, zeros); | |||
| d10 = vmaxq_f32(d10, zeros); | |||
| d11 = vmaxq_f32(d11, zeros); | |||
| } | |||
| if (is_relu6) { | |||
| d00 = vminq_f32(vmaxq_f32(d00, zeros), bounds); | |||
| d01 = vminq_f32(vmaxq_f32(d01, zeros), bounds); | |||
| d10 = vminq_f32(vmaxq_f32(d10, zeros), bounds); | |||
| d11 = vminq_f32(vmaxq_f32(d11, zeros), bounds); | |||
| } | |||
| vst1q_f32(dst_output, d00); | |||
| if (w_in_range) { | |||
| vst1q_f32(dst_output + channel, d01); | |||
| @@ -536,6 +551,19 @@ void ConvDw3x3Fp32OutputUnit(float *src_buf, float *dst_output, const float *bia | |||
| float d10 = t10 + t11 + t12 + bias_ptr[0]; | |||
| float d11 = t11 - t12 - t13 + bias_ptr[0]; | |||
| if (is_relu) { | |||
| d00 = MSMAX(d00, 0); | |||
| d01 = MSMAX(d01, 0); | |||
| d10 = MSMAX(d10, 0); | |||
| d11 = MSMAX(d11, 0); | |||
| } | |||
| if (is_relu6) { | |||
| d00 = MSMIN(MSMAX(d00, 0), 6); | |||
| d01 = MSMIN(MSMAX(d01, 0), 6); | |||
| d10 = MSMIN(MSMAX(d10, 0), 6); | |||
| d11 = MSMIN(MSMAX(d11, 0), 6); | |||
| } | |||
| (dst_output + i)[0] = d00; | |||
| if (w_in_range) { | |||
| (dst_output + i + channel)[0] = d01; | |||
| @@ -1,35 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fused_batchnorm.h" | |||
| #include <math.h> | |||
| void FusedBatchNorm(const float *input_ptr, const float *scale_ptr, const float *offest_ptr, const float *mean_ptr, | |||
| const float *variance_ptr, int *input_shapes, float epsilon, float *output_ptr) { | |||
| int channel = input_shapes[3]; | |||
| int units = 1; | |||
| for (int i = 0; i < 3; i++) { | |||
| units *= input_shapes[i]; | |||
| } | |||
| for (int c = 0; c < input_shapes[3]; c++) { | |||
| auto variance_sqrt = sqrt(variance_ptr[c] + epsilon); | |||
| for (int u = 0; u < units; u++) { | |||
| output_ptr[u * channel + c] = | |||
| (input_ptr[u * channel + c] - mean_ptr[c]) / variance_sqrt * scale_ptr[c] + offest_ptr[c]; | |||
| } | |||
| } | |||
| } | |||
| @@ -1,32 +0,0 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ | |||
| #include "nnacl/op_base.h" | |||
| typedef struct FusedBatchNormParameter { | |||
| OpParameter op_parameter_; | |||
| float epsilon_; | |||
| } FusedBatchNormParameter; | |||
| void FusedBatchNorm(const float *input_ptr, const float *scale_ptr, const float *offest_ptr, const float *mean_ptr, | |||
| const float *variance_ptr, int *input_shapes, float epsilon, float *output_ptr); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FUSED_BATCHNORM_H_ | |||
| @@ -25,10 +25,9 @@ typedef struct ScaleParameter { | |||
| int axis_size_; | |||
| int inner_size_; | |||
| int axis_; | |||
| bool has_offset_; | |||
| // todo yangruoqi: axis | |||
| bool const_scale_ = false; | |||
| bool has_offset_ = false; | |||
| } ScaleParameter; | |||
| int DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param); | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_SCALE_H_ | |||
| @@ -17,33 +17,20 @@ | |||
| #include "mindspore/core/utils/log_adapter.h" | |||
| #include "common/common_test.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/batchnorm.h" | |||
| #include "mindspore/lite/src/runtime/kernel/arm/nnacl/fused_batchnorm.h" | |||
| #include "mindspore/lite/src/kernel_registry.h" | |||
| #include "mindspore/lite/src/lite_kernel.h" | |||
| #include "mindspore/lite/src/common/file_utils.h" | |||
| namespace mindspore { | |||
| class TestBatchnormFp32 : public mindspore::Common { | |||
| public: | |||
| TestBatchnormFp32() {} | |||
| }; | |||
| TEST_F(TestBatchnormFp32, BNTest) { | |||
| std::vector<float> in_data = {0.0669681, 0.959215, 0.252686, 0.613594, 0.811776, 0.139469, 0.322848, 0.118354, | |||
| 0.082978, 0.399467, 0.961267, 0.0247456, 0.0714259, 0.0791484, 0.0648625, 0.561612, | |||
| 0.412069, 0.311492, 0.46109, 0.377125, 0.369283, 0.0332446, 0.696142, 0.715973, | |||
| 0.525524, 0.477265, 0.0336351, 0.751577, 0.377548, 0.964603, 0.0196834, 0.174865}; | |||
| std::vector<float> in_data1 = {0.855446, 0.821765, 0.281008, 0.0798653, 0.22294, 0.793782, 0.963222, 0.17851, | |||
| 0.667549, 0.274381, 0.592842, 0.216552, 0.190274, 0.237873, 0.610063, 0.307559, | |||
| 0.830007, 0.760957, 0.583265, 0.763793, 0.456372, 0.391378, 0.547915, 0.862198, | |||
| 0.510794, 0.826776, 0.515894, 0.30071, 0.404987, 0.184773}; | |||
| std::vector<float> in_data2 = {0.712438, 0.4927, 0.078419, 0.310429, 0.546871, 0.0667141, 0.874321, 0.0265647, | |||
| 0.685165, 0.732586, 0.952889, 0.506402, 0.540784, 0.131119, 0.357713, 0.678992, | |||
| 0.960839, 0.340706, 0.697678, 0.398146, 0.313321, 0.6485, 0.739153, 0.00190134, | |||
| 0.536842, 0.996873, 0.445276, 0.371212, 0.420397, 0.0930115}; | |||
| std::vector<float> in_data3(32, 1); | |||
| std::vector<float> in_data4(32, 0); | |||
| std::vector<float> in_data = {-11.18675, 11.433986, 11.386012, 11.245945, -2.7614849, 14.692399, | |||
| -1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344}; | |||
| std::vector<float> in_data1 = {12.352293, 5.122387, 14.249514}; | |||
| std::vector<float> in_data2 = {14.632595, 0.70900035, 11.179003}; | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor; | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor; | |||
| @@ -51,8 +38,7 @@ TEST_F(TestBatchnormFp32, BNTest) { | |||
| op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; | |||
| op_param.epsilon_ = 0.001f; | |||
| std::vector<int> in_shape = {1, 2, 4, 4}; | |||
| std::vector<int> shape = {1, 2, 2, 3}; | |||
| lite::tensor::Tensor input0_tensor; | |||
| lite::tensor::Tensor input1_tensor; | |||
| lite::tensor::Tensor input2_tensor; | |||
| @@ -62,39 +48,40 @@ TEST_F(TestBatchnormFp32, BNTest) { | |||
| input0_tensor.SetData(in_data.data()); | |||
| input1_tensor.SetData(in_data1.data()); | |||
| input2_tensor.SetData(in_data2.data()); | |||
| input0_tensor.set_shape(in_shape); | |||
| input0_tensor.set_shape(shape); | |||
| input1_tensor.set_shape({3}); | |||
| input2_tensor.set_shape({3}); | |||
| std::vector<float> output(32); | |||
| std::vector<float> corr_out(32); | |||
| std::vector<int> output_shape = {1, 2, 4, 4}; | |||
| std::vector<float> output(12); | |||
| std::vector<float> corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535, | |||
| -3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324}; | |||
| lite::tensor::Tensor output0_tensor; | |||
| outputs_tensor.push_back(&output0_tensor); | |||
| output0_tensor.SetData(output.data()); | |||
| output0_tensor.set_shape(shape); | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| lite::Context ctx; | |||
| ctx.thread_num_ = 7; | |||
| ctx.thread_num_ = 1; | |||
| kernel::LiteKernel *kernel = | |||
| creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| auto output_tensor_shape = output0_tensor.shape(); | |||
| kernel->Run(); | |||
| FusedBatchNorm(in_data.data(), in_data3.data(), in_data4.data(), in_data1.data(), in_data2.data(), in_shape.data(), | |||
| 0.001f, corr_out.data()); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < 1 * 28; i++) { | |||
| for (int i = 0; i < output0_tensor.ElementsNum(); i++) { | |||
| std::cout << output[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| CompareOutputData(output.data(), corr_out.data(), 32, 0.00001); | |||
| CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); | |||
| input0_tensor.SetData(nullptr); | |||
| input1_tensor.SetData(nullptr); | |||
| input2_tensor.SetData(nullptr); | |||
| output0_tensor.SetData(nullptr); | |||
| MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -1 +0,0 @@ | |||
| ýL[?-"R>‰qƒ>{B¸>´?yx?ó×_>JSD>Gº0? | |||
| @@ -1 +0,0 @@ | |||
| J[q? §P?¾ŸŒ>gý?õA?>oo?7G?x¸<¿”"? | |||
| @@ -1 +0,0 @@ | |||
| WÚU>X™8?*Á?!—v>›žF>0î?.ť<�C?Čd? | |||
| @@ -1 +0,0 @@ | |||
| ÜR?Ü]?žÎ>†c~?um?z1->í??Ø'?—U? | |||