diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc index 3da2a1ed7a..35eb07cc0b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc @@ -35,6 +35,34 @@ int PadFp16CPUKernel::RunImpl(int task_id) { } int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) { + auto input = in_tensors_.at(0); + auto output = out_tensors_.at(0); + auto input_data = reinterpret_cast(input->data_c()); + auto output_data = reinterpret_cast(output->data_c()); + + /* Fast Mirror pad */ + if (mirror_pad_block_.size() != 0) { + /* copy center part */ + PadFp16(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->thread_num_); + + /* calculate region part */ + for (size_t i = task_id; i < mirror_pad_block_.size(); i += context_->thread_num_) { + auto block = mirror_pad_block_[i]; + + for (int a = 0; a < block.size_[0]; a++) { + int out_a_index = block.out_offset_ + a * block.out_stride_[0]; + for (int b = 0; b < block.size_[1]; b++) { + int out_b_index = out_a_index + b * block.out_stride_[1]; + for (int c = 0; c < block.size_[2]; ++c) { + int output_index = out_b_index + c * block.out_stride_[2]; + MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]); + } + } + } + } + return RET_OK; + } + int unit = UP_DIV(out_tensors_.at(0)->ElementsNum(), context_->thread_num_); int begin = unit * task_id; int end = MSMIN(begin + unit, out_tensors_.at(0)->ElementsNum());