From 6e022a0064068358842aeb6969ab4309e7eab86e Mon Sep 17 00:00:00 2001 From: chenjianping Date: Fri, 14 Aug 2020 17:27:19 +0800 Subject: [PATCH] crop support parallel --- .../lite/src/runtime/kernel/arm/fp32/crop.cc | 3 +- .../runtime/kernel/arm/nnacl/crop_parameter.h | 1 - .../src/runtime/kernel/arm/nnacl/fp32/crop.c | 4 +- .../src/runtime/kernel/arm/nnacl/fp32/crop.h | 3 +- .../runtime/kernel/arm/fp32/crop_fp32_test.cc | 126 +++++++++++++++--- 5 files changed, 113 insertions(+), 24 deletions(-) diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc index 57214ad08a..d0e2e6a5a3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop.cc @@ -48,8 +48,7 @@ int CropCPUKernel::CropParallelRun(int thread_id) { float *input_data = reinterpret_cast(input->Data()); float *output_data = reinterpret_cast(output->Data()); auto param = reinterpret_cast(op_parameter_); - param->thread_id_ = thread_id; - Crop4D(input_data, output_data, input->shape().data(), output->shape().data(), param); + Crop4D(input_data, output_data, input->shape().data(), output->shape().data(), param, thread_id); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/crop_parameter.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/crop_parameter.h index 847441dd86..21e6497800 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/crop_parameter.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/crop_parameter.h @@ -26,7 +26,6 @@ typedef struct CropParameter { OpParameter op_parameter_; CropQuantArg quant_arg; int thread_count_; - int thread_id_; int offset_size_; int64_t offset_[CROP_OFFSET_MAX_SIZE]; int64_t in_offset_[CROP_OFFSET_MAX_SIZE]; diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.c b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.c index 10d3393d7d..304dffd45f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.c +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.c @@ -30,7 +30,8 @@ void Pad4DOffset(CropParameter *crop_param, int64_t *offset) { } } -void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param) { +void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param, + int thread_id) { int64_t offset_pad[DIMENSION_4D]; Pad4DOffset(crop_param, offset_pad); int out_shape1 = out_shape[1]; @@ -44,7 +45,6 @@ void Crop4D(const float *input, float *output, const int *in_shape, const int *o size_t in_stride0 = in_stride1 * in_shape[1]; size_t copy_size = out_shape3 * sizeof(float); size_t count_per_thread = UP_DIV(out_shape1, crop_param->op_parameter_.thread_num_); - int thread_id = crop_param->thread_id_; size_t thread_stride = thread_id * count_per_thread; for (int i = 0; i < out_shape[0]; ++i) { size_t out_offset0 = i * out_stride0; diff --git a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h index 546fe1e5de..fb616a0a1d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h +++ b/mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h @@ -23,7 +23,8 @@ #ifdef __cplusplus extern "C" { #endif -void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param); +void Crop4D(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param, + int thread_id); void Crop4DNoParallel(const float *input, float *output, const int *in_shape, const int *out_shape, CropParameter *crop_param); #ifdef __cplusplus diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc index 1a64bb0833..da4f4ba41b 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/crop_fp32_test.cc @@ -15,6 +15,7 @@ */ #include "common/common_test.h" #include "mindspore/lite/src/runtime/kernel/arm/nnacl/fp32/crop.h" +#include "mindspore/lite/src/runtime/kernel/arm/fp32/crop.h" namespace mindspore { class CropTestFp32 : public mindspore::CommonTest { @@ -36,8 +37,7 @@ TEST_F(CropTestFp32, CropTest1) { crop_param.offset_[1] = 1; crop_param.offset_[2] = 1; crop_param.op_parameter_.thread_num_ = 1; - crop_param.thread_id_ = 0; - Crop4D(input, output, in_shape, out_shape, &crop_param); + Crop4D(input, output, in_shape, out_shape, &crop_param, 0); for (int i = 0; i < kOutSize; ++i) { std::cout << output[i] << " "; } @@ -60,8 +60,7 @@ TEST_F(CropTestFp32, CropTest2) { crop_param.offset_[2] = 0; crop_param.offset_[3] = 0; crop_param.op_parameter_.thread_num_ = 1; - crop_param.thread_id_ = 0; - Crop4D(input, output, in_shape, out_shape, &crop_param); + Crop4D(input, output, in_shape, out_shape, &crop_param, 0); for (int i = 0; i < kOutSize; ++i) { std::cout << output[i] << " "; } @@ -81,8 +80,7 @@ TEST_F(CropTestFp32, CropTest3) { crop_param.axis_ = 3; crop_param.offset_[0] = 1; crop_param.op_parameter_.thread_num_ = 1; - crop_param.thread_id_ = 0; - Crop4D(input, output, in_shape, out_shape, &crop_param); + Crop4D(input, output, in_shape, out_shape, &crop_param, 0); for (int i = 0; i < kOutSize; ++i) { std::cout << output[i] << " "; } @@ -102,10 +100,8 @@ TEST_F(CropTestFp32, CropTest4) { crop_param.axis_ = 3; crop_param.offset_[0] = 1; crop_param.op_parameter_.thread_num_ = 2; - crop_param.thread_id_ = 0; - Crop4D(input, output, in_shape, out_shape, &crop_param); - crop_param.thread_id_ = 1; - Crop4D(input, output, in_shape, out_shape, &crop_param); + Crop4D(input, output, in_shape, out_shape, &crop_param, 0); + Crop4D(input, output, in_shape, out_shape, &crop_param, 1); for (int i = 0; i < kOutSize; ++i) { std::cout << output[i] << " "; } @@ -191,10 +187,8 @@ TEST_F(CropTestFp32, CropTest8) { crop_param.offset_[1] = 1; crop_param.offset_[2] = 1; crop_param.op_parameter_.thread_num_ = 2; - crop_param.thread_id_ = 0; - Crop4D(input, output, in_shape, out_shape, &crop_param); - crop_param.thread_id_ = 1; - Crop4D(input, output, in_shape, out_shape, &crop_param); + Crop4D(input, output, in_shape, out_shape, &crop_param, 0); + Crop4D(input, output, in_shape, out_shape, &crop_param, 1); for (int i = 0; i < kOutSize; ++i) { std::cout << output[i] << " "; } @@ -219,10 +213,106 @@ TEST_F(CropTestFp32, CropTest9) { crop_param.offset_[1] = 1; crop_param.offset_[2] = 1; crop_param.op_parameter_.thread_num_ = 2; - crop_param.thread_id_ = 0; - Crop4D(input, output, in_shape, out_shape, &crop_param); - crop_param.thread_id_ = 1; - Crop4D(input, output, in_shape, out_shape, &crop_param); + Crop4D(input, output, in_shape, out_shape, &crop_param, 0); + Crop4D(input, output, in_shape, out_shape, &crop_param, 1); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest10) { + float input[50] = {1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50}; + const int kOutSize = 8; + float expect_out[kOutSize] = {1, 2, + 6, 7, + 26, 27, + 31, 32}; + + float output[kOutSize]; + int in_shape[4] = {1, 2, 5, 5}; + int out_shape[4] = {1, 2, 2, 2}; + CropParameter crop_param; + crop_param.axis_ = 2; + crop_param.offset_[0] = 0; + crop_param.offset_[1] = 0; + crop_param.op_parameter_.thread_num_ = 2; + Crop4D(input, output, in_shape, out_shape, &crop_param, 1); + Crop4D(input, output, in_shape, out_shape, &crop_param, 0); + for (int i = 0; i < kOutSize; ++i) { + std::cout << output[i] << " "; + } + std::cout << "\n"; + CompareOutputData(output, expect_out, kOutSize, 0.000001); +} + +TEST_F(CropTestFp32, CropTest11) { + float input[100] = {1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50, + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50}; + + const int kOutSize = 16; + float expect_out[kOutSize] = {1, 2, + 6, 7, + 26, 27, + 31, 32, + 1, 2, + 6, 7, + 26, 27, + 31, 32}; + std::vector in_shape = {1, 4, 5, 5}; + std::vector out_shape = {1, 4, 2, 2}; + std::vector inputs; + std::vector outputs; + auto in_t = + new lite::tensor::Tensor(kNumberTypeFloat, in_shape, schema::Format_NHWC, static_cast(1)); + in_t->MallocData(); + memcpy(in_t->Data(), input, sizeof(float) * in_t->ElementsNum()); + inputs.push_back(in_t); + + auto out_t = + new lite::tensor::Tensor(kNumberTypeFloat, out_shape, schema::Format_NHWC, static_cast(1)); + out_t->MallocData(); + outputs.push_back(out_t); + + auto ctx = new (std::nothrow) lite::Context; + ctx->thread_num_ = 2; + CropParameter crop_param; + crop_param.axis_ = 2; + crop_param.offset_[0] = 0; + crop_param.offset_[1] = 0; + auto kernel = new kernel::CropCPUKernel(reinterpret_cast(&crop_param), inputs, outputs, ctx, nullptr); + kernel->Init(); + kernel->Run(); + + float *output = reinterpret_cast(outputs[0]->Data()); for (int i = 0; i < kOutSize; ++i) { std::cout << output[i] << " "; }