Browse Source

!15096 [MS][LITE][Develop]optimzie fp16 padding

From: @lx0095
Reviewed-by: @hangangqiang,@zhanghaibo5
Signed-off-by: @zhanghaibo5
pull/15096/MERGE
mindspore-ci-bot Gitee 4 years ago
parent
commit
c6a64d34e7
1 changed files with 28 additions and 0 deletions
  1. +28
    -0
      mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc

+ 28
- 0
mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc View File

@@ -35,6 +35,34 @@ int PadFp16CPUKernel::RunImpl(int task_id) {
} }


int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) { int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) {
auto input = in_tensors_.at(0);
auto output = out_tensors_.at(0);
auto input_data = reinterpret_cast<float16_t *>(input->data_c());
auto output_data = reinterpret_cast<float16_t *>(output->data_c());

/* Fast Mirror pad */
if (mirror_pad_block_.size() != 0) {
/* copy center part */
PadFp16(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->thread_num_);

/* calculate region part */
for (size_t i = task_id; i < mirror_pad_block_.size(); i += context_->thread_num_) {
auto block = mirror_pad_block_[i];

for (int a = 0; a < block.size_[0]; a++) {
int out_a_index = block.out_offset_ + a * block.out_stride_[0];
for (int b = 0; b < block.size_[1]; b++) {
int out_b_index = out_a_index + b * block.out_stride_[1];
for (int c = 0; c < block.size_[2]; ++c) {
int output_index = out_b_index + c * block.out_stride_[2];
MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]);
}
}
}
}
return RET_OK;
}

int unit = UP_DIV(out_tensors_.at(0)->ElementsNum(), context_->thread_num_); int unit = UP_DIV(out_tensors_.at(0)->ElementsNum(), context_->thread_num_);
int begin = unit * task_id; int begin = unit * task_id;
int end = MSMIN(begin + unit, out_tensors_.at(0)->ElementsNum()); int end = MSMIN(begin + unit, out_tensors_.at(0)->ElementsNum());


Loading…
Cancel
Save