Merge pull request !4410 from yangruoqi713/litetags/v0.7.0-beta
| @@ -48,8 +48,8 @@ int DeconvDepthwiseConv2D::InferShape(std::vector<tensor::Tensor *> inputs_, std | |||||
| pad_u_ = conv_prim->padUp(); | pad_u_ = conv_prim->padUp(); | ||||
| pad_d_ = conv_prim->padDown(); | pad_d_ = conv_prim->padDown(); | ||||
| pad_r_ = conv_prim->padRight(); | pad_r_ = conv_prim->padRight(); | ||||
| output_h = conv_prim->strideH() * (input_h - 1) * conv_prim->kernelH() - pad_u_ - pad_d_; | |||||
| output_w = conv_prim->strideW() * (input_w - 1) * conv_prim->kernelW() - pad_l_ - pad_r_; | |||||
| output_h = conv_prim->strideH() * (input_h - 1) + conv_prim->kernelH() - pad_u_ - pad_d_; | |||||
| output_w = conv_prim->strideW() * (input_w - 1) + conv_prim->kernelW() - pad_l_ - pad_r_; | |||||
| if ((output_h + conv_prim->padUp() + conv_prim->padDown() - conv_prim->kernelH()) % conv_prim->strideH() != 0) { | if ((output_h + conv_prim->padUp() + conv_prim->padDown() - conv_prim->kernelH()) % conv_prim->strideH() != 0) { | ||||
| output_h += (output_h + conv_prim->padLeft() + conv_prim->padRight() - conv_prim->kernelH()) % conv_prim->strideH(); | output_h += (output_h + conv_prim->padLeft() + conv_prim->padRight() - conv_prim->kernelH()) % conv_prim->strideH(); | ||||
| } | } | ||||
| @@ -42,10 +42,10 @@ class BatchnormCPUKernel : public LiteKernel { | |||||
| int DoExecute(int tid); | int DoExecute(int tid); | ||||
| private: | private: | ||||
| float *in_addr_; | |||||
| float *mean_addr_; | |||||
| float *var_addr_; | |||||
| float *out_addr_; | |||||
| float *in_addr_ = nullptr; | |||||
| float *mean_addr_ = nullptr; | |||||
| float *var_addr_ = nullptr; | |||||
| float *out_addr_ = nullptr; | |||||
| BatchNormParameter *batchnorm_param_; | BatchNormParameter *batchnorm_param_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -40,10 +40,10 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| SlidingWindowParam *sliding_; | |||||
| float *packed_weight_; | |||||
| float *packed_input_; | |||||
| float *packed_output_; | |||||
| SlidingWindowParam *sliding_ = nullptr; | |||||
| float *packed_weight_ = nullptr; | |||||
| float *packed_input_ = nullptr; | |||||
| float *packed_output_ = nullptr; | |||||
| bool need_align_ = false; | bool need_align_ = false; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -49,11 +49,11 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| float *packed_weight_; | |||||
| float *packed_input_; | |||||
| float *packed_output_; | |||||
| float *block_buffer_; | |||||
| float *trans_buffer_; | |||||
| float *packed_weight_ = nullptr; | |||||
| float *packed_input_ = nullptr; | |||||
| float *packed_output_ = nullptr; | |||||
| float *block_buffer_ = nullptr; | |||||
| float *trans_buffer_ = nullptr; | |||||
| int trans_size_; | int trans_size_; | ||||
| bool need_align_ = false; | bool need_align_ = false; | ||||
| }; | }; | ||||
| @@ -40,12 +40,12 @@ class FusedBatchnormCPUKernel : public LiteKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| float *in_addr_; | |||||
| float *mean_addr_; | |||||
| float *var_addr_; | |||||
| float *scale_addr_; | |||||
| float *offset_addr_; | |||||
| float *out_addr_; | |||||
| float *in_addr_ = nullptr; | |||||
| float *mean_addr_ = nullptr; | |||||
| float *var_addr_ = nullptr; | |||||
| float *scale_addr_ = nullptr; | |||||
| float *offset_addr_ = nullptr; | |||||
| float *out_addr_ = nullptr; | |||||
| BatchNormParameter *batchnorm_param_; | BatchNormParameter *batchnorm_param_; | ||||
| }; | }; | ||||
| @@ -28,9 +28,7 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Scale; | using mindspore::schema::PrimitiveType_Scale; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| ScaleCPUKernel::~ScaleCPUKernel() { FreeTmpBuffer(); } | |||||
| void ScaleCPUKernel::FreeTmpBuffer() { | |||||
| ScaleCPUKernel::~ScaleCPUKernel() { | |||||
| if (scale_param_->const_scale_) { | if (scale_param_->const_scale_) { | ||||
| if (scale_ != nullptr) { | if (scale_ != nullptr) { | ||||
| free(scale_); | free(scale_); | ||||
| @@ -46,7 +44,6 @@ void ScaleCPUKernel::FreeTmpBuffer() { | |||||
| } | } | ||||
| int ScaleCPUKernel::InitScaleOffset() { | int ScaleCPUKernel::InitScaleOffset() { | ||||
| FreeTmpBuffer(); | |||||
| auto scale_tensor = in_tensors_.at(1); | auto scale_tensor = in_tensors_.at(1); | ||||
| float *scale_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->Data()); | float *scale_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->Data()); | ||||
| if (scale_ptr != nullptr) { | if (scale_ptr != nullptr) { | ||||
| @@ -116,10 +113,7 @@ int ScaleCPUKernel::Init() { | |||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int ScaleCPUKernel::ReSize() { | |||||
| auto ret = InitParameter(); | auto ret = InitParameter(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Scale fp32 InitParameter failed."; | MS_LOG(ERROR) << "Scale fp32 InitParameter failed."; | ||||
| @@ -134,6 +128,15 @@ int ScaleCPUKernel::ReSize() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ScaleCPUKernel::ReSize() { | |||||
| auto ret = InitParameter(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Scale fp32 InitParameter failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ScaleCPUKernel::Scale(int task_id) { | int ScaleCPUKernel::Scale(int task_id) { | ||||
| auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_); | auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -41,11 +41,10 @@ class ScaleCPUKernel : public LiteKernel { | |||||
| int Scale(int task_id); | int Scale(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| float *input_ptr_; | |||||
| float *scale_; | |||||
| float *offset_; | |||||
| float *output_ptr_; | |||||
| float *input_ptr_ = nullptr; | |||||
| float *scale_ = nullptr; | |||||
| float *offset_ = nullptr; | |||||
| float *output_ptr_ = nullptr; | |||||
| ScaleParameter *scale_param_; | ScaleParameter *scale_param_; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -84,4 +84,67 @@ TEST_F(TestBatchnormFp32, BNTest) { | |||||
| output0_tensor.SetData(nullptr); | output0_tensor.SetData(nullptr); | ||||
| MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; | MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; | ||||
| } | } | ||||
| TEST_F(TestBatchnormFp32, FusedBNTest) { | |||||
| std::vector<float> in_data = {-7.400094, 11.37495, 2.0271842, 5.5954003, 13.255154, 4.6289115, | |||||
| 9.591311, 8.699771, -12.226144, -6.1819935, 6.957936, -8.70818}; | |||||
| std::vector<float> scale = {13.323708, 14.0656395, 12.634319}; | |||||
| std::vector<float> offset = {27.888096, 24.533648, 15.335093}; | |||||
| std::vector<float> mean = {11.5127125, 0.47681615, 5.851508}; | |||||
| std::vector<float> var = {1.270583, 13.005714, 6.089223}; | |||||
| std::vector<lite::tensor::Tensor *> inputs_tensor; | |||||
| std::vector<lite::tensor::Tensor *> outputs_tensor; | |||||
| BatchNormParameter op_param; | |||||
| op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm; | |||||
| op_param.epsilon_ = 0.001f; | |||||
| std::vector<int> shape = {1, 2, 2, 3}; | |||||
| lite::tensor::Tensor input[5]; | |||||
| input[0].SetData(in_data.data()); | |||||
| input[1].SetData(scale.data()); | |||||
| input[2].SetData(offset.data()); | |||||
| input[3].SetData(mean.data()); | |||||
| input[4].SetData(var.data()); | |||||
| input[0].set_shape(shape); | |||||
| for (int i = 1; i < 5; i++) { | |||||
| input[i].set_shape({3}); | |||||
| } | |||||
| for (int i = 0; i < 5; i++) { | |||||
| inputs_tensor.push_back(&input[i]); | |||||
| } | |||||
| std::vector<float> output(12); | |||||
| std::vector<float> corr_out = {-195.5765, 67.03745, -4.243883, -42.028015, 74.37044, 9.075897, | |||||
| 5.1857452, 56.60399, -77.215096, -181.18402, 49.81066, -59.204563}; | |||||
| lite::tensor::Tensor output0_tensor; | |||||
| outputs_tensor.push_back(&output0_tensor); | |||||
| output0_tensor.SetData(output.data()); | |||||
| output0_tensor.set_shape(shape); | |||||
| kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm}; | |||||
| auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); | |||||
| ASSERT_NE(creator, nullptr); | |||||
| lite::Context ctx; | |||||
| ctx.thread_num_ = 1; | |||||
| kernel::LiteKernel *kernel = | |||||
| creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr); | |||||
| ASSERT_NE(kernel, nullptr); | |||||
| auto output_tensor_shape = output0_tensor.shape(); | |||||
| kernel->Run(); | |||||
| printf("==================output data=================\n"); | |||||
| for (int i = 0; i < output0_tensor.ElementsNum(); i++) { | |||||
| std::cout << output[i] << " ,"; | |||||
| } | |||||
| std::cout << std::endl; | |||||
| CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001); | |||||
| for (int i = 1; i < 5; i++) { | |||||
| input[i].SetData(nullptr); | |||||
| } | |||||
| output0_tensor.SetData(nullptr); | |||||
| MS_LOG(INFO) << "TestFusedBathNormFp32 accuracy passed"; | |||||
| } | |||||
| } // namespace mindspore | } // namespace mindspore | ||||