Browse Source

!9512 [MSLITE] fp32 mirror pad optimize

From: @ling_qiao_min
Reviewed-by: 
Signed-off-by:
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
88b81c053d
4 changed files with 176 additions and 6 deletions
  1. +13
    -0
      mindspore/lite/nnacl/common_func.h
  2. +6
    -0
      mindspore/lite/nnacl/pad_parameter.h
  3. +148
    -4
      mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
  4. +9
    -2
      mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h

+ 13
- 0
mindspore/lite/nnacl/common_func.h View File

@@ -44,6 +44,19 @@ static inline bool isMulOverflow(int32_t x, int32_t y) {
int32_t p = x * y;
return (x != 0) && (p / x != y);
}

static inline int GetStride(int *strides, const int *shape, int length) {
if (length <= 0) {
return 1;
}
int stride = 1;
for (int i = length - 1; i >= 0; --i) {
strides[i] = stride;
stride *= shape[i];
}
return stride;
}

#ifdef ENABLE_ARM64
void BiasAdd(const float *bias, float *data, size_t oc4, size_t plan_size);
void BiasAddRelu6(const float *bias, float *data, size_t oc4, size_t plan_size);


+ 6
- 0
mindspore/lite/nnacl/pad_parameter.h View File

@@ -37,4 +37,10 @@ typedef struct PadParameter {
PadQuantArg pad_quant_arg_;
} PadParameter;

typedef struct MirrorPadBlock {
int out_offset_;
int out_stride_[3];
int size_[3];
} MirrorPadBlock;

#endif // MINDSPORE_LITE_NNACL_PAD_PARAMETER_H_

+ 148
- 4
mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc View File

@@ -15,12 +15,8 @@
*/

#include "src/runtime/kernel/arm/fp32/pad_fp32.h"
#include <string>
#include <cmath>
#include "src/kernel_registry.h"
#include "schema/model_generated.h"
#include "include/errorcode.h"
#include "nnacl/errorcode.h"
#include "src/runtime/runtime_api.h"

using mindspore::kernel::KERNEL_ARCH::kCPU;
@@ -73,6 +69,131 @@ int PadCPUKernel::ReSize() {
return RET_OK;
}

void PadCPUKernel::InitMirrorPadBlock() {
mirror_pad_block_.clear();

auto input = in_tensors_.at(0);

std::vector<int> left_pads(input->shape().size());
for (size_t i = 0; i < input->shape().size(); ++i) {
left_pads[i] = pad_param_->paddings_[2 * i];
}

std::vector<int> input_seperate_dims;
std::vector<int> output_seperate_dims;
std::vector<int> seperate_offset;

/* init seperate dims */
int cur_input = 1;
int cur_output = 1;
for (size_t i = 0; i < input->shape().size(); ++i) {
if (in_[i] != out_[i]) {
if (1 < cur_input) {
input_seperate_dims.emplace_back(cur_input);
output_seperate_dims.emplace_back(cur_output);
seperate_offset.emplace_back(0);
}
input_seperate_dims.emplace_back(in_[i]);
output_seperate_dims.emplace_back(out_[i]);
seperate_offset.emplace_back(left_pads[i]);
cur_input = 1;
cur_output = 1;
} else {
cur_input *= in_[i];
cur_output *= out_[i];
}
}
if (cur_input != 1 || cur_output != 1) {
input_seperate_dims.emplace_back(cur_input);
output_seperate_dims.emplace_back(cur_output);
seperate_offset.emplace_back(0);
}

/* init seperate stride */
std::vector<int> output_seperate_stride;
output_seperate_stride.resize(output_seperate_dims.size());
GetStride(output_seperate_stride.data(), output_seperate_dims.data(), output_seperate_dims.size());

/* init seperate stride */
std::vector<int> remain_stride;
int remain_stride_size = seperate_offset.size() > 3 ? static_cast<int>(seperate_offset.size()) - 3 : 0;
remain_stride.resize(remain_stride_size);
int remain_size = GetStride(remain_stride.data(), output_seperate_dims.data(), remain_stride.size());

std::vector<int> right_pads(seperate_offset.size());
for (size_t i = 0; i < right_pads.size(); ++i) {
right_pads[i] = output_seperate_dims[i] - input_seperate_dims[i] - seperate_offset[i];
}

/* init pad region */
std::vector<int> pad_region;
for (size_t i = remain_stride.size(); i < output_seperate_stride.size(); ++i) {
// 0: center, 1: left, 2: right
int r = 1;
if (seperate_offset[i] > 0) {
r++;
}
if (right_pads[i] > 0) {
r++;
}
pad_region.emplace_back(r);
}

std::vector<int> pad_region_stride(pad_region.size());
int region_size = GetStride(pad_region_stride.data(), pad_region.data(), pad_region.size());
int remain_dim_offset = remain_stride.size();

std::vector<int> pad_cord(pad_region.size());

for (int pos = 0; pos < remain_size; ++pos) {
int dst_basic_offset = 0;

for (int index = 1; index < region_size; ++index) {
int dst_offset = dst_basic_offset;

int value = index;
for (size_t i = 0; i < pad_region.size(); ++i) {
pad_cord[i] = value / pad_region_stride[i];
value = value % pad_region_stride[i];
}

MirrorPadBlock block;
int size_offset = 3 - static_cast<int>(pad_region.size());
for (size_t i = 0; i < pad_region.size(); ++i) {
int di = size_offset + i;
int si = remain_dim_offset + i;
switch (pad_cord[i]) {
case 0:
dst_offset += seperate_offset[si] * output_seperate_stride[si];
block.size_[di] = input_seperate_dims[si];
block.out_stride_[di] = output_seperate_stride[si];
break;
case 2:
dst_offset += (seperate_offset[si] + input_seperate_dims[si]) * output_seperate_stride[si];
block.size_[di] = right_pads[si];
block.out_stride_[di] = output_seperate_stride[si];
break;
case 1:
if (seperate_offset[si] > 0) {
block.size_[di] = seperate_offset[si];
block.out_stride_[di] = output_seperate_stride[si];
} else {
dst_offset += (seperate_offset[si] + input_seperate_dims[si]) * output_seperate_stride[si];
block.size_[di] = right_pads[si];
block.out_stride_[di] = output_seperate_stride[si];
}
break;
default:
break;
}
}
block.out_offset_ = dst_offset;
mirror_pad_block_.push_back(std::move(block));
}
}
return;
}

int PadCPUKernel::ExtendShape(int *shape, int length, const int *ori_shape, int rank) {
if (shape == nullptr || ori_shape == nullptr) {
return RET_NULL_PTR;
@@ -139,6 +260,27 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) {
auto input_data = reinterpret_cast<float *>(input->MutableData());
auto output_data = reinterpret_cast<float *>(output->MutableData());

/* Fast Mirror pad */
if (mirror_pad_block_.size() != 0) {
/* copy center part */
Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->thread_num_);

/* calculate region part */
for (size_t i = task_id; i < mirror_pad_block_.size(); i += context_->thread_num_) {
auto block = mirror_pad_block_[i];

for (int a = 0; a < block.size_[0]; a++) {
int out_a_index = block.out_offset_ + a * block.out_stride_[0];
for (int b = 0; b < block.size_[1]; b++) {
int output_index = out_a_index + b * block.out_stride_[1];
MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[2]);
}
}
}
return RET_OK;
}

/* Common Mirror pad */
int unit = UP_DIV(output->ElementsNum(), context_->thread_num_);
int begin = unit * task_id;
int end = MSMIN(begin + unit, output->ElementsNum());
@@ -235,6 +377,8 @@ int PadCPUKernel::HandleMirrorPad() {
}
CalculateStrides();
pad_param_->mirror_offset_ = pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_REFLECT) ? 1 : 0;

InitMirrorPadBlock();
return RET_OK;
}



+ 9
- 2
mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h View File

@@ -17,9 +17,14 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_PAD_H_

#include <vector>
#include "src/lite_kernel.h"

#include <string>
#include <cmath>
#include <utility>
#include "include/errorcode.h"
#include "nnacl/fp32/pad_fp32.h"
#include "nnacl/errorcode.h"
#include "nnacl/common_func.h"
#include "src/lite_kernel.h"
#include "src/runtime/kernel/arm/base/layout_transform.h"

namespace mindspore::kernel {
@@ -46,12 +51,14 @@ class PadCPUKernel : public LiteKernel {
void CalculateStrides();
int ExtendShape(int *shape, int length, const int *ori_shape, int rank);
int ExtendPaddings(int *paddings, int length, const int *ori_paddings, int ori_length);
void InitMirrorPadBlock();

protected:
int HandleMirrorPad();
PadParameter *pad_param_ = nullptr;
int in_[4] = {0};
int out_[4] = {0};
std::vector<MirrorPadBlock> mirror_pad_block_;
};

int PadImpl(void *cdata, int task_id);


Loading…
Cancel
Save