Browse Source

!4410 [MS][LITE] fix bug of arm cpu fp32 op: conv depthwise; rewrite member variables of some ops

Merge pull request !4410 from yangruoqi713/lite
tags/v0.7.0-beta
mindspore-ci-bot Gitee 5 years ago
parent
commit
eea10fac98
8 changed files with 98 additions and 33 deletions
  1. +2
    -2
      mindspore/lite/src/ops/deconvolution_depthwise.cc
  2. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h
  3. +4
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h
  4. +5
    -5
      mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h
  5. +6
    -6
      mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h
  6. +10
    -7
      mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc
  7. +4
    -5
      mindspore/lite/src/runtime/kernel/arm/fp32/scale.h
  8. +63
    -0
      mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc

+ 2
- 2
mindspore/lite/src/ops/deconvolution_depthwise.cc View File

@@ -48,8 +48,8 @@ int DeconvDepthwiseConv2D::InferShape(std::vector<tensor::Tensor *> inputs_, std
pad_u_ = conv_prim->padUp(); pad_u_ = conv_prim->padUp();
pad_d_ = conv_prim->padDown(); pad_d_ = conv_prim->padDown();
pad_r_ = conv_prim->padRight(); pad_r_ = conv_prim->padRight();
output_h = conv_prim->strideH() * (input_h - 1) * conv_prim->kernelH() - pad_u_ - pad_d_;
output_w = conv_prim->strideW() * (input_w - 1) * conv_prim->kernelW() - pad_l_ - pad_r_;
output_h = conv_prim->strideH() * (input_h - 1) + conv_prim->kernelH() - pad_u_ - pad_d_;
output_w = conv_prim->strideW() * (input_w - 1) + conv_prim->kernelW() - pad_l_ - pad_r_;
if ((output_h + conv_prim->padUp() + conv_prim->padDown() - conv_prim->kernelH()) % conv_prim->strideH() != 0) { if ((output_h + conv_prim->padUp() + conv_prim->padDown() - conv_prim->kernelH()) % conv_prim->strideH() != 0) {
output_h += (output_h + conv_prim->padLeft() + conv_prim->padRight() - conv_prim->kernelH()) % conv_prim->strideH(); output_h += (output_h + conv_prim->padLeft() + conv_prim->padRight() - conv_prim->kernelH()) % conv_prim->strideH();
} }


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm.h View File

@@ -42,10 +42,10 @@ class BatchnormCPUKernel : public LiteKernel {
int DoExecute(int tid); int DoExecute(int tid);


private: private:
float *in_addr_;
float *mean_addr_;
float *var_addr_;
float *out_addr_;
float *in_addr_ = nullptr;
float *mean_addr_ = nullptr;
float *var_addr_ = nullptr;
float *out_addr_ = nullptr;
BatchNormParameter *batchnorm_param_; BatchNormParameter *batchnorm_param_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 4
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise.h View File

@@ -40,10 +40,10 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
int Execute(int task_id); int Execute(int task_id);


private: private:
SlidingWindowParam *sliding_;
float *packed_weight_;
float *packed_input_;
float *packed_output_;
SlidingWindowParam *sliding_ = nullptr;
float *packed_weight_ = nullptr;
float *packed_input_ = nullptr;
float *packed_output_ = nullptr;
bool need_align_ = false; bool need_align_ = false;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 5
- 5
mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3.h View File

@@ -49,11 +49,11 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel {
int Execute(int task_id); int Execute(int task_id);


private: private:
float *packed_weight_;
float *packed_input_;
float *packed_output_;
float *block_buffer_;
float *trans_buffer_;
float *packed_weight_ = nullptr;
float *packed_input_ = nullptr;
float *packed_output_ = nullptr;
float *block_buffer_ = nullptr;
float *trans_buffer_ = nullptr;
int trans_size_; int trans_size_;
bool need_align_ = false; bool need_align_ = false;
}; };


+ 6
- 6
mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm.h View File

@@ -40,12 +40,12 @@ class FusedBatchnormCPUKernel : public LiteKernel {
int Execute(int task_id); int Execute(int task_id);


private: private:
float *in_addr_;
float *mean_addr_;
float *var_addr_;
float *scale_addr_;
float *offset_addr_;
float *out_addr_;
float *in_addr_ = nullptr;
float *mean_addr_ = nullptr;
float *var_addr_ = nullptr;
float *scale_addr_ = nullptr;
float *offset_addr_ = nullptr;
float *out_addr_ = nullptr;


BatchNormParameter *batchnorm_param_; BatchNormParameter *batchnorm_param_;
}; };


+ 10
- 7
mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc View File

@@ -28,9 +28,7 @@ using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Scale; using mindspore::schema::PrimitiveType_Scale;


namespace mindspore::kernel { namespace mindspore::kernel {
ScaleCPUKernel::~ScaleCPUKernel() { FreeTmpBuffer(); }

void ScaleCPUKernel::FreeTmpBuffer() {
ScaleCPUKernel::~ScaleCPUKernel() {
if (scale_param_->const_scale_) { if (scale_param_->const_scale_) {
if (scale_ != nullptr) { if (scale_ != nullptr) {
free(scale_); free(scale_);
@@ -46,7 +44,6 @@ void ScaleCPUKernel::FreeTmpBuffer() {
} }


int ScaleCPUKernel::InitScaleOffset() { int ScaleCPUKernel::InitScaleOffset() {
FreeTmpBuffer();
auto scale_tensor = in_tensors_.at(1); auto scale_tensor = in_tensors_.at(1);
float *scale_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->Data()); float *scale_ptr = reinterpret_cast<float *>(in_tensors_.at(1)->Data());
if (scale_ptr != nullptr) { if (scale_ptr != nullptr) {
@@ -116,10 +113,7 @@ int ScaleCPUKernel::Init() {
if (!InferShapeDone()) { if (!InferShapeDone()) {
return RET_OK; return RET_OK;
} }
return ReSize();
}


int ScaleCPUKernel::ReSize() {
auto ret = InitParameter(); auto ret = InitParameter();
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale fp32 InitParameter failed."; MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
@@ -134,6 +128,15 @@ int ScaleCPUKernel::ReSize() {
return RET_OK; return RET_OK;
} }


int ScaleCPUKernel::ReSize() {
auto ret = InitParameter();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Scale fp32 InitParameter failed.";
return RET_ERROR;
}
return RET_OK;
}

int ScaleCPUKernel::Scale(int task_id) { int ScaleCPUKernel::Scale(int task_id) {
auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_); auto ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, scale_param_);
if (ret != RET_OK) { if (ret != RET_OK) {


+ 4
- 5
mindspore/lite/src/runtime/kernel/arm/fp32/scale.h View File

@@ -41,11 +41,10 @@ class ScaleCPUKernel : public LiteKernel {
int Scale(int task_id); int Scale(int task_id);


private: private:
void FreeTmpBuffer();
float *input_ptr_;
float *scale_;
float *offset_;
float *output_ptr_;
float *input_ptr_ = nullptr;
float *scale_ = nullptr;
float *offset_ = nullptr;
float *output_ptr_ = nullptr;
ScaleParameter *scale_param_; ScaleParameter *scale_param_;
}; };
} // namespace mindspore::kernel } // namespace mindspore::kernel


+ 63
- 0
mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/batchnorm_fp32_tests.cc View File

@@ -84,4 +84,67 @@ TEST_F(TestBatchnormFp32, BNTest) {
output0_tensor.SetData(nullptr); output0_tensor.SetData(nullptr);
MS_LOG(INFO) << "TestBathNormFp32 accuracy passed"; MS_LOG(INFO) << "TestBathNormFp32 accuracy passed";
} }

TEST_F(TestBatchnormFp32, FusedBNTest) {
std::vector<float> in_data = {-7.400094, 11.37495, 2.0271842, 5.5954003, 13.255154, 4.6289115,
9.591311, 8.699771, -12.226144, -6.1819935, 6.957936, -8.70818};
std::vector<float> scale = {13.323708, 14.0656395, 12.634319};
std::vector<float> offset = {27.888096, 24.533648, 15.335093};
std::vector<float> mean = {11.5127125, 0.47681615, 5.851508};
std::vector<float> var = {1.270583, 13.005714, 6.089223};
std::vector<lite::tensor::Tensor *> inputs_tensor;
std::vector<lite::tensor::Tensor *> outputs_tensor;

BatchNormParameter op_param;
op_param.op_parameter_.type_ = schema::PrimitiveType_BatchNorm;
op_param.epsilon_ = 0.001f;

std::vector<int> shape = {1, 2, 2, 3};
lite::tensor::Tensor input[5];
input[0].SetData(in_data.data());
input[1].SetData(scale.data());
input[2].SetData(offset.data());
input[3].SetData(mean.data());
input[4].SetData(var.data());

input[0].set_shape(shape);
for (int i = 1; i < 5; i++) {
input[i].set_shape({3});
}
for (int i = 0; i < 5; i++) {
inputs_tensor.push_back(&input[i]);
}

std::vector<float> output(12);
std::vector<float> corr_out = {-195.5765, 67.03745, -4.243883, -42.028015, 74.37044, 9.075897,
5.1857452, 56.60399, -77.215096, -181.18402, 49.81066, -59.204563};

lite::tensor::Tensor output0_tensor;
outputs_tensor.push_back(&output0_tensor);
output0_tensor.SetData(output.data());
output0_tensor.set_shape(shape);
kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, schema::PrimitiveType_FusedBatchNorm};
auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc);
ASSERT_NE(creator, nullptr);
lite::Context ctx;
ctx.thread_num_ = 1;
kernel::LiteKernel *kernel =
creator(inputs_tensor, outputs_tensor, reinterpret_cast<OpParameter *>(&op_param), &ctx, desc, nullptr);
ASSERT_NE(kernel, nullptr);
auto output_tensor_shape = output0_tensor.shape();
kernel->Run();

printf("==================output data=================\n");
for (int i = 0; i < output0_tensor.ElementsNum(); i++) {
std::cout << output[i] << " ,";
}
std::cout << std::endl;
CompareOutputData(output.data(), corr_out.data(), output0_tensor.ElementsNum(), 0.001);

for (int i = 1; i < 5; i++) {
input[i].SetData(nullptr);
}
output0_tensor.SetData(nullptr);
MS_LOG(INFO) << "TestFusedBathNormFp32 accuracy passed";
}
} // namespace mindspore } // namespace mindspore

Loading…
Cancel
Save