| @@ -35,6 +35,34 @@ int PadFp16CPUKernel::RunImpl(int task_id) { | |||||
| } | } | ||||
| int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) { | int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) { | ||||
| auto input = in_tensors_.at(0); | |||||
| auto output = out_tensors_.at(0); | |||||
| auto input_data = reinterpret_cast<float16_t *>(input->data_c()); | |||||
| auto output_data = reinterpret_cast<float16_t *>(output->data_c()); | |||||
| /* Fast Mirror pad */ | |||||
| if (mirror_pad_block_.size() != 0) { | |||||
| /* copy center part */ | |||||
| PadFp16(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->thread_num_); | |||||
| /* calculate region part */ | |||||
| for (size_t i = task_id; i < mirror_pad_block_.size(); i += context_->thread_num_) { | |||||
| auto block = mirror_pad_block_[i]; | |||||
| for (int a = 0; a < block.size_[0]; a++) { | |||||
| int out_a_index = block.out_offset_ + a * block.out_stride_[0]; | |||||
| for (int b = 0; b < block.size_[1]; b++) { | |||||
| int out_b_index = out_a_index + b * block.out_stride_[1]; | |||||
| for (int c = 0; c < block.size_[2]; ++c) { | |||||
| int output_index = out_b_index + c * block.out_stride_[2]; | |||||
| MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int unit = UP_DIV(out_tensors_.at(0)->ElementsNum(), context_->thread_num_); | int unit = UP_DIV(out_tensors_.at(0)->ElementsNum(), context_->thread_num_); | ||||
| int begin = unit * task_id; | int begin = unit * task_id; | ||||
| int end = MSMIN(begin + unit, out_tensors_.at(0)->ElementsNum()); | int end = MSMIN(begin + unit, out_tensors_.at(0)->ElementsNum()); | ||||