Merge pull request !4961 from sunsuodong/batch_norm_fp16tags/v0.7.0-beta
| @@ -0,0 +1,52 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "nnacl/fp16/batchnorm_fp16.h" | |||
| #include <math.h> | |||
| void BatchNormFp16(const void *input, const void *mean, const void *variance, | |||
| BatchNormParameter *param, int task_id, void *output) { | |||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | |||
| int completed_units = task_id * units_per_thread; | |||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | |||
| int cur_offset = completed_units * param->channel_; | |||
| for (int i = 0; i < cur_unit; i++) { | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| float16_t variance_sqrt = sqrt(((const float16_t *)variance)[c] + param->epsilon_); | |||
| ((float16_t *)output)[cur_offset + c] = | |||
| (((const float16_t *)input)[cur_offset + c] - ((const float16_t *)mean)[c]) / variance_sqrt; | |||
| } | |||
| cur_offset += param->channel_; | |||
| } | |||
| } | |||
| void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean, | |||
| const void *variance, BatchNormParameter *param, int task_id, void *output) { | |||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | |||
| int completed_units = task_id * units_per_thread; | |||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | |||
| int cur_offset = completed_units * param->channel_; | |||
| for (int i = 0; i < cur_unit; i++) { | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| float16_t variance_sqrt = sqrt(((const float16_t *)variance)[c] + param->epsilon_); | |||
| float16_t norm_val = (((const float16_t *)input)[cur_offset + c] - ((const float16_t *)mean)[c]) / variance_sqrt; | |||
| ((float16_t *)output)[cur_offset + c] = norm_val * ((const float16_t *)scale)[c] + ((const float16_t *)offset)[c]; | |||
| } | |||
| cur_offset += param->channel_; | |||
| } | |||
| } | |||
| @@ -0,0 +1,37 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_ | |||
| #ifdef ENABLE_NEON | |||
| #include <arm_neon.h> | |||
| #endif | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void BatchNormFp16(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||
| void *output); | |||
| void FusedBatchNormFp16(const void *input, const void *scale, const void *offset, const void *mean, | |||
| const void *variance, BatchNormParameter *param, int task_id, void *output); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_NNACL_FP16_BATCHNORM_FP16_H_ | |||
| @@ -15,26 +15,42 @@ | |||
| */ | |||
| #include "nnacl/fp32/batchnorm.h" | |||
| #include "nnacl/fp16/batchnorm_fp16.h" | |||
| #include <math.h> | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/errorcode.h" | |||
| void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, | |||
| BatchNormParameter *param) { | |||
| for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { | |||
| float variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); | |||
| for (int u = 0; u < param->unit_; u++) { | |||
| output_ptr[u * param->channel_ + c] = (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt; | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, | |||
| BatchNormParameter *param, int task_id, void *output) { | |||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | |||
| int completed_units = task_id * units_per_thread; | |||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | |||
| int cur_offset = completed_units * param->channel_; | |||
| for (int i = 0; i < cur_unit; i++) { | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| float variance_sqrt = sqrt(((const float *)variance)[c] + param->epsilon_); | |||
| ((float *)output)[cur_offset + c] = | |||
| (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; | |||
| } | |||
| cur_offset += param->channel_; | |||
| } | |||
| } | |||
| void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, | |||
| const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param) { | |||
| for (int c = task_id; c < param->channel_; c += param->op_parameter_.thread_num_) { | |||
| float variance_sqrt = sqrt(variance_ptr[c] + param->epsilon_); | |||
| for (int u = 0; u < param->unit_; u++) { | |||
| output_ptr[u * param->channel_ + c] = | |||
| (input_ptr[u * param->channel_ + c] - mean_ptr[c]) / variance_sqrt * scale_ptr[c] + offest_ptr[c]; | |||
| void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | |||
| const void *variance, BatchNormParameter *param, int task_id, void *output) { | |||
| int units_per_thread = UP_DIV(param->unit_, param->op_parameter_.thread_num_); | |||
| int completed_units = task_id * units_per_thread; | |||
| int cur_unit = MSMIN(units_per_thread, param->unit_ - completed_units); | |||
| int cur_offset = completed_units * param->channel_; | |||
| for (int i = 0; i < cur_unit; i++) { | |||
| for (int c = 0; c < param->channel_; c++) { | |||
| float variance_sqrt = sqrt(((const float *)variance)[c] + param->epsilon_); | |||
| float norm_val = (((const float *)input)[cur_offset + c] - ((const float *)mean)[c]) / variance_sqrt; | |||
| ((float *)output)[cur_offset + c] = norm_val * ((const float *)scale)[c] + ((const float *)offset)[c]; | |||
| } | |||
| cur_offset += param->channel_; | |||
| } | |||
| } | |||
| @@ -17,18 +17,16 @@ | |||
| #ifndef MINDSPORE_LITE_NNACL_FP32_BATCHNORM_H_ | |||
| #define MINDSPORE_LITE_NNACL_FP32_BATCHNORM_H_ | |||
| #include "nnacl/op_base.h" | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void BatchNorm(float *output_ptr, const float *input_ptr, const float *mean_ptr, const float *variance_ptr, int task_id, | |||
| BatchNormParameter *param); | |||
| void FusedBatchNorm(float *output_ptr, const float *input_ptr, const float *scale_ptr, const float *offest_ptr, | |||
| const float *mean_ptr, const float *variance_ptr, int task_id, BatchNormParameter *param); | |||
| void BatchNormFp32(const void *input, const void *mean, const void *variance, BatchNormParameter *param, int task_id, | |||
| void *output); | |||
| void FusedBatchNormFp32(const void *input, const void *scale, const void *offset, const void *mean, | |||
| const void *variance, BatchNormParameter *param, int task_id, void *output); | |||
| #ifdef __cplusplus | |||
| } | |||
| @@ -0,0 +1,87 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp16/batchnorm_fp16.h" | |||
| #include "nnacl/fp16/batchnorm_fp16.h" | |||
| #include "nnacl/fp16/cast_fp16.h" | |||
| #include "src/kernel_registry.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::kernel { | |||
| int BatchnormFp16CPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) { | |||
| auto input = in_tensors_.at(0); | |||
| auto mean = in_tensors_.at(1); | |||
| auto variance = in_tensors_.at(2); | |||
| auto output = out_tensors_.at(0); | |||
| auto input_fp16 = context_->allocator->Malloc(input->ElementsNum() * sizeof(float16_t)); | |||
| auto mean_fp16 = context_->allocator->Malloc(mean->ElementsNum() * sizeof(float16_t)); | |||
| auto variance_fp16 = context_->allocator->Malloc(variance->ElementsNum() * sizeof(float16_t)); | |||
| auto output_fp16 = context_->allocator->Malloc(output->ElementsNum() * sizeof(float16_t)); | |||
| if (input_fp16 == nullptr || mean_fp16 == nullptr || variance_fp16 == nullptr || output_fp16 == nullptr) { | |||
| context_->allocator->Free(input_fp16); | |||
| context_->allocator->Free(mean_fp16); | |||
| context_->allocator->Free(variance_fp16); | |||
| context_->allocator->Free(output_fp16); | |||
| } | |||
| Float32ToFloat16(reinterpret_cast<float *>(input->Data()), | |||
| reinterpret_cast<float16_t *>(input_fp16), input->ElementsNum()); | |||
| Float32ToFloat16(reinterpret_cast<float *>(mean->Data()), | |||
| reinterpret_cast<float16_t *>(mean_fp16), mean->ElementsNum()); | |||
| Float32ToFloat16(reinterpret_cast<float *>(variance->Data()), | |||
| reinterpret_cast<float16_t *>(variance_fp16), variance->ElementsNum()); | |||
| BatchNormFp16(input_fp16, mean_fp16, variance_fp16, param, task_id, output_fp16); | |||
| Float16ToFloat32(reinterpret_cast<float16_t *>(output_fp16), reinterpret_cast<float *>(output), | |||
| output->ElementsNum()); | |||
| context_->allocator->Free(input_fp16); | |||
| context_->allocator->Free(mean_fp16); | |||
| context_->allocator->Free(variance_fp16); | |||
| context_->allocator->Free(output_fp16); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| BatchNormFp16(in_tensors_.at(0)->Data(), mean_, variance_, param, task_id, out_tensors_.at(0)->Data()); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuBatchnormFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *opParameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| auto *kernel = new (std::nothrow) BatchnormFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BatchnormFp16CPUKernel fail!"; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| delete kernel; | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| // REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_BatchNorm, CpuBatchnormFp16KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_ | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/fp32/batchnorm.h" | |||
| namespace mindspore::kernel { | |||
| class BatchnormFp16CPUKernel : public BatchnormCPUKernel { | |||
| public: | |||
| BatchnormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| virtual ~BatchnormFp16CPUKernel() {} | |||
| virtual int DoExecute(int task_id); | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_BATCHNORM_FP16_H_ | |||
| @@ -0,0 +1,103 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.h" | |||
| #include "nnacl/fp16/batchnorm_fp16.h" | |||
| #include "nnacl/fp16/cast_fp16.h" | |||
| #include "src/kernel_registry.h" | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::schema::PrimitiveType_FusedBatchNorm; | |||
| namespace mindspore::kernel { | |||
| int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| if (in_tensors_.at(0)->data_type() == kNumberTypeFloat32) { | |||
| auto input = in_tensors_.at(0); | |||
| auto scale = in_tensors_.at(1); | |||
| auto offset = in_tensors_.at(2); | |||
| auto mean = in_tensors_.at(3); | |||
| auto variance = in_tensors_.at(4); | |||
| auto output = out_tensors_.at(0); | |||
| auto input_fp16 = context_->allocator->Malloc(input->ElementsNum() * sizeof(float16_t)); | |||
| auto scale_fp16 = context_->allocator->Malloc(scale->ElementsNum() * sizeof(float16_t)); | |||
| auto offset_fp16 = context_->allocator->Malloc(offset->ElementsNum() * sizeof(float16_t)); | |||
| auto mean_fp16 = context_->allocator->Malloc(mean->ElementsNum() * sizeof(float16_t)); | |||
| auto variance_fp16 = context_->allocator->Malloc(variance->ElementsNum() * sizeof(float16_t)); | |||
| auto output_fp16 = context_->allocator->Malloc(output->ElementsNum() * sizeof(float16_t)); | |||
| if (input_fp16 == nullptr || scale_fp16 == nullptr || offset_fp16 == nullptr || | |||
| mean_fp16 == nullptr || variance_fp16 == nullptr || output_fp16 == nullptr) { | |||
| context_->allocator->Free(input_fp16); | |||
| context_->allocator->Free(scale_fp16); | |||
| context_->allocator->Free(offset_fp16); | |||
| context_->allocator->Free(mean_fp16); | |||
| context_->allocator->Free(variance_fp16); | |||
| context_->allocator->Free(output_fp16); | |||
| } | |||
| Float32ToFloat16(reinterpret_cast<float *>(input->Data()), | |||
| reinterpret_cast<float16_t *>(input_fp16), input->ElementsNum()); | |||
| Float32ToFloat16(reinterpret_cast<float *>(scale->Data()), | |||
| reinterpret_cast<float16_t *>(scale_fp16), scale->ElementsNum()); | |||
| Float32ToFloat16(reinterpret_cast<float *>(offset->Data()), | |||
| reinterpret_cast<float16_t *>(offset_fp16), offset->ElementsNum()); | |||
| Float32ToFloat16(reinterpret_cast<float *>(mean->Data()), | |||
| reinterpret_cast<float16_t *>(mean_fp16), mean->ElementsNum()); | |||
| Float32ToFloat16(reinterpret_cast<float *>(variance->Data()), | |||
| reinterpret_cast<float16_t *>(variance_fp16), variance->ElementsNum()); | |||
| FusedBatchNormFp16(input_fp16, scale_fp16, offset_fp16, mean_fp16, variance_fp16, param, task_id, | |||
| output_fp16); | |||
| Float16ToFloat32(reinterpret_cast<float16_t *>(output_fp16), reinterpret_cast<float *>(output), | |||
| output->ElementsNum()); | |||
| context_->allocator->Free(input_fp16); | |||
| context_->allocator->Free(scale_fp16); | |||
| context_->allocator->Free(offset_fp16); | |||
| context_->allocator->Free(mean_fp16); | |||
| context_->allocator->Free(variance_fp16); | |||
| context_->allocator->Free(output_fp16); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| FusedBatchNormFp16(in_tensors_.at(0)->Data(), scale_, offset_, mean_, variance_, param, task_id, | |||
| out_tensors_.at(0)->Data()); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuFusedBatchnormFp16KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| FusedBatchnormFp16CPUKernel *kernel = | |||
| new (std::nothrow) FusedBatchnormFp16CPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new FusedBatchnormFp16CPUKernel fail!"; | |||
| return nullptr; | |||
| } | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << op_parameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(op_parameter->type_)); | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| } | |||
| // REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_FusedBatchNorm, CpuFusedBatchnormFp16KernelCreator) | |||
| } // namespace mindspore::kernel | |||
| @@ -0,0 +1,36 @@ | |||
| /** | |||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||
| * | |||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||
| * you may not use this file except in compliance with the License. | |||
| * You may obtain a copy of the License at | |||
| * | |||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||
| * | |||
| * Unless required by applicable law or agreed to in writing, software | |||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_ | |||
| #include <vector> | |||
| #include "src/runtime/kernel/arm/fp32/fused_batchnorm.h" | |||
| namespace mindspore::kernel { | |||
| class FusedBatchnormFp16CPUKernel : public FusedBatchnormCPUKernel { | |||
| public: | |||
| FusedBatchnormFp16CPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : FusedBatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| virtual ~FusedBatchnormFp16CPUKernel() {} | |||
| virtual int DoExecute(int task_id); | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_FUSED_BATCHNORM_FP16_H_ | |||
| @@ -15,50 +15,12 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/batchnorm.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #include "nnacl/fp32/batchnorm.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_BatchNorm; | |||
| namespace mindspore::kernel { | |||
| BatchnormCPUKernel::~BatchnormCPUKernel() { | |||
| if (mean_addr_ != nullptr) { | |||
| free(mean_addr_); | |||
| mean_addr_ = nullptr; | |||
| } | |||
| if (var_addr_ != nullptr) { | |||
| free(var_addr_); | |||
| var_addr_ = nullptr; | |||
| } | |||
| } | |||
| int BatchnormCPUKernel::InitConstTensor() { | |||
| auto mean = in_tensors_[1]; | |||
| mean_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float))); | |||
| if (mean_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); | |||
| auto variance = in_tensors_[2]; | |||
| var_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float))); | |||
| if (var_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| int BatchnormCPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| @@ -67,62 +29,72 @@ int BatchnormCPUKernel::Init() { | |||
| } | |||
| int BatchnormCPUKernel::ReSize() { | |||
| if (mean_addr_ != nullptr) { | |||
| free(mean_addr_); | |||
| mean_addr_ = nullptr; | |||
| FreeMeanAndVariance(); | |||
| FillParam(); | |||
| return InitConstTensor(); | |||
| } | |||
| void BatchnormCPUKernel::FreeMeanAndVariance() { | |||
| if (mean_ != nullptr) { | |||
| free(mean_); | |||
| mean_ = nullptr; | |||
| } | |||
| if (var_addr_ != nullptr) { | |||
| free(var_addr_); | |||
| var_addr_ = nullptr; | |||
| if (variance_ != nullptr) { | |||
| free(variance_); | |||
| variance_ = nullptr; | |||
| } | |||
| } | |||
| void BatchnormCPUKernel::FillParam() { | |||
| auto input_shapes = in_tensors_[0]->shape(); | |||
| auto n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| batchnorm_param_->unit_ = 1; | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| param->channel_ = input_shapes[n_dim - 1]; | |||
| param->unit_ = 1; | |||
| for (size_t i = 0; i < n_dim - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| param->unit_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); | |||
| } | |||
| auto ret = InitConstTensor(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Batchnorm fp32 InitConstTensor failed."; | |||
| int BatchnormCPUKernel::InitConstTensor() { | |||
| mean_ = malloc(in_tensors_[1]->Size()); | |||
| variance_ = malloc(in_tensors_[2]->Size()); | |||
| if (mean_ == nullptr || variance_ == nullptr) { | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| FreeMeanAndVariance(); | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(mean_, in_tensors_[1]->Data(), in_tensors_[1]->Size()); | |||
| memcpy(variance_, in_tensors_[2]->Data(), in_tensors_[2]->Size()); | |||
| return RET_OK; | |||
| } | |||
| int BatchnormCPUKernel::DoExecute(int task_id) { | |||
| BatchNorm(out_addr_, in_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); | |||
| return RET_OK; | |||
| } | |||
| int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto g_kernel = reinterpret_cast<BatchnormCPUKernel *>(cdata); | |||
| auto ret = g_kernel->DoExecute(task_id); | |||
| int BatchnormCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << ret; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| ret = LiteBackendParallelLaunch(BatchNormRun, this, op_parameter_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| } | |||
| return ret; | |||
| } | |||
| int BatchnormCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| in_addr_ = reinterpret_cast<float *>(in_tensors_.at(0)->Data()); | |||
| out_addr_ = reinterpret_cast<float *>(out_tensors_.at(0)->Data()); | |||
| int BatchnormCPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| BatchNormFp32(in_tensors_.at(0)->Data(), mean_, variance_, param, task_id, out_tensors_.at(0)->Data()); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| int ret = LiteBackendParallelLaunch(BatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); | |||
| int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto kernel = reinterpret_cast<BatchnormCPUKernel *>(cdata); | |||
| auto ret = kernel->DoExecute(task_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||
| return ret; | |||
| MS_LOG(ERROR) << "BatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| } | |||
| return RET_OK; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| @@ -131,7 +103,6 @@ kernel::LiteKernel *CpuBatchnormKernelCreator(const std::vector<lite::tensor::Te | |||
| const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_BatchNorm); | |||
| auto *kernel = new (std::nothrow) BatchnormCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "new BatchNormCPUKernel fail!"; | |||
| @@ -22,6 +22,7 @@ | |||
| #include "include/context.h" | |||
| #include "nnacl/fp32/batchnorm.h" | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| using mindspore::lite::Context; | |||
| @@ -31,24 +32,23 @@ class BatchnormCPUKernel : public LiteKernel { | |||
| BatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | |||
| } | |||
| ~BatchnormCPUKernel() override; | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| virtual ~BatchnormCPUKernel() { FreeMeanAndVariance(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitConstTensor(); | |||
| int DoExecute(int tid); | |||
| private: | |||
| float *in_addr_ = nullptr; | |||
| float *mean_addr_ = nullptr; | |||
| float *var_addr_ = nullptr; | |||
| float *out_addr_ = nullptr; | |||
| BatchNormParameter *batchnorm_param_; | |||
| virtual int InitConstTensor(); | |||
| virtual int DoExecute(int task_id); | |||
| protected: | |||
| void FillParam(); | |||
| void FreeMeanAndVariance(); | |||
| void *mean_ = nullptr; | |||
| void *variance_ = nullptr; | |||
| }; | |||
| int BatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata); | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_BATCHNORM_H_ | |||
| @@ -15,133 +15,59 @@ | |||
| */ | |||
| #include "src/runtime/kernel/arm/fp32/fused_batchnorm.h" | |||
| #include "schema/model_generated.h" | |||
| #include "src/kernel_registry.h" | |||
| #include "include/errorcode.h" | |||
| #include "src/runtime/runtime_api.h" | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #include "nnacl/fp32/batchnorm.h" | |||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||
| using mindspore::lite::KernelRegistrar; | |||
| using mindspore::lite::RET_ERROR; | |||
| using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_FusedBatchNorm; | |||
| namespace mindspore::kernel { | |||
| FusedBatchnormCPUKernel::~FusedBatchnormCPUKernel() { FreeTmpBuffer(); } | |||
| int FusedBatchnormCPUKernel::ReSize() { | |||
| FreeMeanAndVariance(); | |||
| FreeScaleAndOffset(); | |||
| FillParam(); | |||
| return InitConstTensor(); | |||
| } | |||
| void FusedBatchnormCPUKernel::FreeTmpBuffer() { | |||
| if (scale_addr_ != nullptr) { | |||
| free(scale_addr_); | |||
| scale_addr_ = nullptr; | |||
| } | |||
| if (offset_addr_ != nullptr) { | |||
| free(offset_addr_); | |||
| offset_addr_ = nullptr; | |||
| void FusedBatchnormCPUKernel::FreeScaleAndOffset() { | |||
| if (scale_ != nullptr) { | |||
| free(scale_); | |||
| scale_ = nullptr; | |||
| } | |||
| if (mean_addr_ != nullptr) { | |||
| free(mean_addr_); | |||
| mean_addr_ = nullptr; | |||
| } | |||
| if (var_addr_ != nullptr) { | |||
| free(var_addr_); | |||
| var_addr_ = nullptr; | |||
| if (offset_ != nullptr) { | |||
| free(offset_); | |||
| offset_ = nullptr; | |||
| } | |||
| } | |||
| int FusedBatchnormCPUKernel::InitConstTensor() { | |||
| auto scale = in_tensors_[1]; | |||
| scale_addr_ = reinterpret_cast<float *>(malloc(scale->ElementsNum() * sizeof(float))); | |||
| if (scale_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(scale_addr_, scale->Data(), scale->ElementsNum() * sizeof(float)); | |||
| auto offset = in_tensors_[2]; | |||
| offset_addr_ = reinterpret_cast<float *>(malloc(offset->ElementsNum() * sizeof(float))); | |||
| if (offset_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(offset_addr_, offset->Data(), offset->ElementsNum() * sizeof(float)); | |||
| auto mean = in_tensors_[3]; | |||
| mean_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float))); | |||
| if (mean_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(mean_addr_, mean->Data(), mean->ElementsNum() * sizeof(float)); | |||
| auto variance = in_tensors_[4]; | |||
| var_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float))); | |||
| if (var_addr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(var_addr_, variance->Data(), variance->ElementsNum() * sizeof(float)); | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| scale_ = malloc(scale->Size()); | |||
| offset_ = malloc(offset->Size()); | |||
| mean_ = malloc(mean->Size()); | |||
| variance_ = malloc(variance->Size()); | |||
| int FusedBatchnormCPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| auto input_shapes = in_tensors_[0]->shape(); | |||
| auto n_dim = input_shapes.size(); | |||
| batchnorm_param_->channel_ = input_shapes[n_dim - 1]; | |||
| batchnorm_param_->unit_ = 1; | |||
| for (size_t i = 0; i < n_dim - 1; i++) { | |||
| batchnorm_param_->unit_ *= input_shapes[i]; | |||
| } | |||
| batchnorm_param_->op_parameter_.thread_num_ = | |||
| MSMIN(batchnorm_param_->op_parameter_.thread_num_, batchnorm_param_->channel_); | |||
| auto ret = InitConstTensor(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "FusedBatchnorm fp32 InitConstTensor failed."; | |||
| if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr) { | |||
| FreeMeanAndVariance(); | |||
| FreeScaleAndOffset(); | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(scale_, scale->Data(), scale->Size()); | |||
| memcpy(offset_, offset->Data(), offset->Size()); | |||
| memcpy(mean_, mean->Data(), mean->Size()); | |||
| memcpy(variance_, variance->Data(), variance->Size()); | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Execute(int task_id) { | |||
| FusedBatchNorm(out_addr_, in_addr_, scale_addr_, offset_addr_, mean_addr_, var_addr_, task_id, batchnorm_param_); | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchNormRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| auto g_kernel = reinterpret_cast<FusedBatchnormCPUKernel *>(cdata); | |||
| auto ret = g_kernel->Execute(task_id); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "FusedBatchnormRun error task_id[" << task_id << "] error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int FusedBatchnormCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Prepare fail! Ret error code: " << prepare_ret; | |||
| return prepare_ret; | |||
| } | |||
| in_addr_ = reinterpret_cast<float *>(in_tensors_.at(0)->Data()); | |||
| out_addr_ = reinterpret_cast<float *>(out_tensors_.at(0)->Data()); | |||
| int ret = LiteBackendParallelLaunch(FusedBatchNormRun, this, batchnorm_param_->op_parameter_.thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "FusedBatchnormRun error error_code[" << ret << "]"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| int FusedBatchnormCPUKernel::DoExecute(int task_id) { | |||
| auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_); | |||
| FusedBatchNormFp32(in_tensors_.at(0)->Data(), scale_, offset_, mean_, variance_, param, task_id, | |||
| out_tensors_.at(0)->Data()); | |||
| return mindspore::lite::RET_OK; | |||
| } | |||
| kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| @@ -149,11 +75,6 @@ kernel::LiteKernel *CpuFusedBatchnormKernelCreator(const std::vector<lite::tenso | |||
| OpParameter *op_parameter, const lite::Context *ctx, | |||
| const kernel::KernelKey &desc, | |||
| const mindspore::lite::PrimitiveC *primitive) { | |||
| if (op_parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_FusedBatchNorm); | |||
| FusedBatchnormCPUKernel *kernel = | |||
| new (std::nothrow) FusedBatchnormCPUKernel(op_parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| @@ -18,37 +18,26 @@ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_FUSED_BATCHNORM_H_ | |||
| #include <vector> | |||
| #include "src/lite_kernel.h" | |||
| #include "nnacl/batchnorm_parameter.h" | |||
| #include "src/runtime/kernel/arm/fp32/batchnorm.h" | |||
| namespace mindspore::kernel { | |||
| class FusedBatchnormCPUKernel : public LiteKernel { | |||
| class FusedBatchnormCPUKernel : public BatchnormCPUKernel { | |||
| public: | |||
| FusedBatchnormCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const mindspore::lite::PrimitiveC *primitive) | |||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| batchnorm_param_ = reinterpret_cast<BatchNormParameter *>(parameter); | |||
| } | |||
| ~FusedBatchnormCPUKernel() override; | |||
| : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int InitConstTensor(); | |||
| int Execute(int task_id); | |||
| int InitConstTensor() override; | |||
| int DoExecute(int task_id) override; | |||
| private: | |||
| void FreeTmpBuffer(); | |||
| float *in_addr_ = nullptr; | |||
| float *mean_addr_ = nullptr; | |||
| float *var_addr_ = nullptr; | |||
| float *scale_addr_ = nullptr; | |||
| float *offset_addr_ = nullptr; | |||
| float *out_addr_ = nullptr; | |||
| BatchNormParameter *batchnorm_param_; | |||
| protected: | |||
| void FreeScaleAndOffset(); | |||
| void *scale_ = nullptr; | |||
| void *offset_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -31,40 +31,32 @@ TEST_F(TestBatchnormFp32, BNTest) { | |||
| -1.1983503, -6.6790967, 6.383416, -13.3213005, -8.693595, 9.476344}; | |||
| std::vector<float> in_data1 = {12.352293, 5.122387, 14.249514}; | |||
| std::vector<float> in_data2 = {14.632595, 0.70900035, 11.179003}; | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor; | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor; | |||
| BatchNormParameter op_param; | |||
| op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; | |||
| op_param.epsilon_ = 0.001f; | |||
| std::vector<int> shape = {1, 2, 2, 3}; | |||
| lite::tensor::Tensor input0_tensor; | |||
| lite::tensor::Tensor input1_tensor; | |||
| lite::tensor::Tensor input2_tensor; | |||
| inputs_tensor.push_back(&input0_tensor); | |||
| inputs_tensor.push_back(&input1_tensor); | |||
| inputs_tensor.push_back(&input2_tensor); | |||
| lite::tensor::Tensor input0_tensor(kNumberTypeFloat32, {1, 2, 2, 3}); | |||
| lite::tensor::Tensor input1_tensor(kNumberTypeFloat32, {3}); | |||
| lite::tensor::Tensor input2_tensor(kNumberTypeFloat32, {3}); | |||
| input0_tensor.SetData(in_data.data()); | |||
| input1_tensor.SetData(in_data1.data()); | |||
| input2_tensor.SetData(in_data2.data()); | |||
| input0_tensor.set_shape(shape); | |||
| input1_tensor.set_shape({3}); | |||
| input2_tensor.set_shape({3}); | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor = {&input0_tensor, &input1_tensor, &input2_tensor}; | |||
| std::vector<float> output(12); | |||
| std::vector<float> corr_out = {-6.1533737, 7.4904885, -0.8563998, -0.289212, -9.356432, 0.13245535, | |||
| -3.5422924, -14.005781, -2.3525476, -6.7113695, -16.396551, -1.4275324}; | |||
| lite::tensor::Tensor output0_tensor; | |||
| outputs_tensor.push_back(&output0_tensor); | |||
| lite::tensor::Tensor output0_tensor(kNumberTypeFloat32, {1, 2, 2, 3}); | |||
| output0_tensor.SetData(output.data()); | |||
| output0_tensor.set_shape(shape); | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor = {&output0_tensor}; | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| lite::Context ctx; | |||
| ctx.thread_num_ = 1; | |||
| ctx.thread_num_ = 2; | |||
| kernel::LiteKernel *kernel = | |||
| creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| @@ -82,7 +74,6 @@ TEST_F(TestBatchnormFp32, BNTest) { | |||
| input1_tensor.SetData(nullptr); | |||
| input2_tensor.SetData(nullptr); | |||
| output0_tensor.SetData(nullptr); | |||
| MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; | |||
| } | |||
| TEST_F(TestBatchnormFp32, FusedBNTest) { | |||
| @@ -92,118 +83,102 @@ TEST_F(TestBatchnormFp32, FusedBNTest) { | |||
| std::vector<float> offset = {27.888096, 24.533648, 15.335093}; | |||
| std::vector<float> mean = {11.5127125, 0.47681615, 5.851508}; | |||
| std::vector<float> var = {1.270583, 13.005714, 6.089223}; | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor; | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor; | |||
| BatchNormParameter op_param; | |||
| op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; | |||
| op_param.epsilon_ = 0.001f; | |||
| std::vector<int> shape = {1, 2, 2, 3}; | |||
| lite::tensor::Tensor input[5]; | |||
| input[0].SetData(in_data.data()); | |||
| input[1].SetData(scale.data()); | |||
| input[2].SetData(offset.data()); | |||
| input[3].SetData(mean.data()); | |||
| input[4].SetData(var.data()); | |||
| input[0].set_shape(shape); | |||
| for (int i = 1; i < 5; i++) { | |||
| input[i].set_shape({3}); | |||
| } | |||
| for (int i = 0; i < 5; i++) { | |||
| inputs_tensor.push_back(&input[i]); | |||
| } | |||
| lite::tensor::Tensor input0(kNumberTypeFloat32, {1, 2, 2, 3}); | |||
| lite::tensor::Tensor input1(kNumberTypeFloat32, {3}); | |||
| lite::tensor::Tensor input2(kNumberTypeFloat32, {3}); | |||
| lite::tensor::Tensor input3(kNumberTypeFloat32, {3}); | |||
| lite::tensor::Tensor input4(kNumberTypeFloat32, {3}); | |||
| input0.SetData(in_data.data()); | |||
| input1.SetData(scale.data()); | |||
| input2.SetData(offset.data()); | |||
| input3.SetData(mean.data()); | |||
| input4.SetData(var.data()); | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor = {&input0, &input1, &input2, &input3, &input4}; | |||
| std::vector<float> output(12); | |||
| std::vector<float> corr_out = {-195.5765, 67.03745, -4.243883, -42.028015, 74.37044, 9.075897, | |||
| 5.1857452, 56.60399, -77.215096, -181.18402, 49.81066, -59.204563}; | |||
| lite::tensor::Tensor output0_tensor; | |||
| outputs_tensor.push_back(&output0_tensor); | |||
| output0_tensor.SetData(output.data()); | |||
| output0_tensor.set_shape(shape); | |||
| lite::tensor::Tensor output0(kNumberTypeFloat32, {1, 2, 2, 3}); | |||
| output0.SetData(output.data()); | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor = {&output0}; | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| lite::Context ctx; | |||
| ctx.thread_num_ = 1; | |||
| ctx.thread_num_ = 2; | |||
| kernel::LiteKernel *kernel = | |||
| creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| auto output_tensor_shape = output0_tensor.shape(); | |||
| kernel->Run(); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < output0_tensor.ElementsNum(); i++) { | |||
| for (int i = 0; i < output0.ElementsNum(); i++) { | |||
| std::cout << output[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); | |||
| for (int i = 1; i < 5; i++) { | |||
| input[i].SetData(nullptr); | |||
| } | |||
| output0_tensor.SetData(nullptr); | |||
| MS_LOG(INFO) << "TestFusedBathNormFp32 accuracy passed"; | |||
| CompareOutputData(output.data(), corr_out.data(), output0.ElementsNum(), 0.001); | |||
| input0.SetData(nullptr); | |||
| input1.SetData(nullptr); | |||
| input2.SetData(nullptr); | |||
| input3.SetData(nullptr); | |||
| input4.SetData(nullptr); | |||
| output0.SetData(nullptr); | |||
| } | |||
| TEST_F(TestBatchnormFp32, easyTest) { | |||
| std::vector<float> in_data = {1, 4, 2, 5, 3, 6, -1, -4, -2, -5, -3, -6}; | |||
| std::vector<float> in_data1 = {0.1, 0.6}; | |||
| std::vector<float> in_data2 = {3, 4}; | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor; | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor; | |||
| BatchNormParameter op_param; | |||
| op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; | |||
| op_param.epsilon_ = 0.001f; | |||
| std::vector<int> shape = {1, 1, 6, 2}; | |||
| lite::tensor::Tensor input0_tensor; | |||
| lite::tensor::Tensor input1_tensor; | |||
| lite::tensor::Tensor input2_tensor; | |||
| inputs_tensor.push_back(&input0_tensor); | |||
| inputs_tensor.push_back(&input1_tensor); | |||
| inputs_tensor.push_back(&input2_tensor); | |||
| input0_tensor.SetData(in_data.data()); | |||
| input1_tensor.SetData(in_data1.data()); | |||
| input2_tensor.SetData(in_data2.data()); | |||
| input0_tensor.set_shape(shape); | |||
| input1_tensor.set_shape({2}); | |||
| input2_tensor.set_shape({2}); | |||
| lite::tensor::Tensor input0(kNumberTypeFloat32, {1, 1, 6, 2}); | |||
| lite::tensor::Tensor input1(kNumberTypeFloat32, {2}); | |||
| lite::tensor::Tensor input2(kNumberTypeFloat32, {2}); | |||
| input0.SetData(in_data.data()); | |||
| input1.SetData(in_data1.data()); | |||
| input2.SetData(in_data2.data()); | |||
| std::vector<lite::tensor::Tensor *> inputs_tensor = {&input0, &input1, &input2}; | |||
| std::vector<float> output(12); | |||
| std::vector<float> corr_out = {0.519529, 1.69979, 1.09678, 2.19973, 1.67404, 2.69966, | |||
| -0.63498, -2.29971, -1.21223, -2.79965, -1.78949, -3.29959}; | |||
| lite::tensor::Tensor output0_tensor; | |||
| outputs_tensor.push_back(&output0_tensor); | |||
| output0_tensor.SetData(output.data()); | |||
| output0_tensor.set_shape(shape); | |||
| lite::tensor::Tensor output0(kNumberTypeFloat32, {1, 1, 6, 2}); | |||
| output0.SetData(output.data()); | |||
| std::vector<lite::tensor::Tensor *> outputs_tensor = {&output0}; | |||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_BatchNorm}; | |||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||
| ASSERT_NE(creator, nullptr); | |||
| lite::Context ctx; | |||
| ctx.thread_num_ = 1; | |||
| ctx.thread_num_ = 2; | |||
| kernel::LiteKernel *kernel = | |||
| creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr); | |||
| ASSERT_NE(kernel, nullptr); | |||
| auto output_tensor_shape = output0_tensor.shape(); | |||
| kernel->Run(); | |||
| printf("==================output data=================\n"); | |||
| for (int i = 0; i < output0_tensor.ElementsNum(); i++) { | |||
| for (int i = 0; i < output0.ElementsNum(); i++) { | |||
| std::cout << output[i] << " ,"; | |||
| } | |||
| std::cout << std::endl; | |||
| CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); | |||
| CompareOutputData(output.data(), corr_out.data(), output0.ElementsNum(), 0.001); | |||
| input0_tensor.SetData(nullptr); | |||
| input1_tensor.SetData(nullptr); | |||
| input2_tensor.SetData(nullptr); | |||
| output0_tensor.SetData(nullptr); | |||
| MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; | |||
| input0.SetData(nullptr); | |||
| input1.SetData(nullptr); | |||
| input2.SetData(nullptr); | |||
| output0.SetData(nullptr); | |||
| } | |||
| } // namespace mindspore | |||