Merge pull request !5890 from sunsuodong/pad_fp16tags/v1.0.0
| @@ -0,0 +1,35 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "nnacl/fp16/pad_fp16.h" | |||||
| #include "nnacl/common_func.h" | |||||
| void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape, | |||||
| const int *paddings, const int tid, const int thread_num) { | |||||
| int in[4], out[4]; | |||||
| for (in[0] = 0; in[0] < input_shape[0]; in[0]++) { | |||||
| out[0] = in[0] + paddings[0]; | |||||
| for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) { | |||||
| out[1] = in[1] + paddings[2]; | |||||
| for (in[2] = 0; in[2] < input_shape[2]; in[2]++) { | |||||
| out[2] = in[2] + paddings[4]; | |||||
| float16_t *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]); | |||||
| const float16_t *src = input_data + offset(input_shape, in[0], in[1], in[2], 0); | |||||
| memcpy(dst, src, input_shape[3] * sizeof(float16_t)); | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,32 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_NNACL_FP16_PAD_FP16_H_ | |||||
| #define MINDSPORE_LITE_NNACL_FP16_PAD_FP16_H_ | |||||
| #ifdef ENABLE_NEON | |||||
| #include <arm_neon.h> | |||||
| #endif | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape, | |||||
| const int *paddings, const int tid, const int thread_num); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif // MINDSPORE_LITE_NNACL_FP16_PAD_FP16_H_ | |||||
| @@ -0,0 +1,101 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #include "src/runtime/kernel/arm/fp16/pad_fp16.h" | |||||
| #include "src/runtime/kernel/arm/fp16/common_fp16.h" | |||||
| #include "nnacl/fp16/cast_fp16.h" | |||||
| #include "src/kernel_registry.h" | |||||
| #include "src/runtime/runtime_api.h" | |||||
| using mindspore::kernel::KERNEL_ARCH::kCPU; | |||||
| using mindspore::lite::KernelRegistrar; | |||||
| using mindspore::lite::RET_ERROR; | |||||
| using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Pad; | |||||
| namespace mindspore::kernel { | |||||
| int PadFp16CPUKernel::RunImpl(int task_id) { | |||||
| auto input_data = reinterpret_cast<float16_t *>(in_tensors_.at(0)->MutableData()); | |||||
| auto output_data = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData()); | |||||
| PadFp16(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, context_->thread_num_); | |||||
| return RET_OK; | |||||
| } | |||||
| int PadFp16CPUKernel::Run() { | |||||
| auto ret = Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| auto input_tensor = in_tensors_.at(0); | |||||
| auto output_tensor = out_tensors_.at(0); | |||||
| is_input_fp32_ = input_tensor->data_type() == kNumberTypeFloat32; | |||||
| is_output_fp32_ = output_tensor->data_type() == kNumberTypeFloat32; | |||||
| input_ = ConvertInputFp32toFp16(input_tensor, context_); | |||||
| output_ = MallocOutputFp16(output_tensor, context_); | |||||
| if (input_ == nullptr || output_ == nullptr) { | |||||
| FreeInputAndOutput(); | |||||
| MS_LOG(ERROR) << "input or output is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| memset(output_, 0, output_tensor->ElementsNum() * sizeof(float16_t)); | |||||
| ret = ParallelLaunch(THREAD_POOL_DEFAULT, PadImpl, this, op_parameter_->thread_num_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; | |||||
| } | |||||
| if (is_output_fp32_) { | |||||
| Float16ToFloat32(output_, reinterpret_cast<float *>(output_tensor->MutableData()), output_tensor->ElementsNum()); | |||||
| } | |||||
| FreeInputAndOutput(); | |||||
| return ret; | |||||
| } | |||||
| void PadFp16CPUKernel::FreeInputAndOutput() { | |||||
| if (is_input_fp32_) { | |||||
| context_->allocator->Free(input_); | |||||
| input_ = nullptr; | |||||
| } | |||||
| if (is_output_fp32_) { | |||||
| context_->allocator->Free(output_); | |||||
| output_ = nullptr; | |||||
| } | |||||
| } | |||||
| kernel::LiteKernel *CpuPadFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, | |||||
| OpParameter *opParameter, const lite::Context *ctx, | |||||
| const kernel::KernelKey &desc, | |||||
| const mindspore::lite::PrimitiveC *primitive) { | |||||
| auto *kernel = new (std::nothrow) PadFp16CPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "new PadFp16CPUKernel fail!"; | |||||
| return nullptr; | |||||
| } | |||||
| auto ret = kernel->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| delete kernel; | |||||
| return nullptr; | |||||
| } | |||||
| return kernel; | |||||
| } | |||||
| REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Pad, CpuPadFp16KernelCreator) | |||||
| } // namespace mindspore::kernel | |||||
| @@ -0,0 +1,45 @@ | |||||
| /** | |||||
| * Copyright 2020 Huawei Technologies Co., Ltd | |||||
| * | |||||
| * Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| * you may not use this file except in compliance with the License. | |||||
| * You may obtain a copy of the License at | |||||
| * | |||||
| * http://www.apache.org/licenses/LICENSE-2.0 | |||||
| * | |||||
| * Unless required by applicable law or agreed to in writing, software | |||||
| * distributed under the License is distributed on an "AS IS" BASIS, | |||||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| * See the License for the specific language governing permissions and | |||||
| * limitations under the License. | |||||
| */ | |||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_PAD_FP16_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_PAD_FP16_H_ | |||||
| #include <vector> | |||||
| #include "src/runtime/kernel/arm/fp32/pad.h" | |||||
| #include "nnacl/fp16/pad_fp16.h" | |||||
| namespace mindspore::kernel { | |||||
| class PadFp16CPUKernel : public PadCPUKernel { | |||||
| public: | |||||
| PadFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | |||||
| const mindspore::lite::PrimitiveC *primitive) | |||||
| : PadCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| ~PadFp16CPUKernel() {} | |||||
| int Run() override; | |||||
| int RunImpl(int task_id) override; | |||||
| private: | |||||
| void FreeInputAndOutput(); | |||||
| bool is_input_fp32_ = false; | |||||
| bool is_output_fp32_ = false; | |||||
| float16_t *input_ = nullptr; | |||||
| float16_t *output_ = nullptr; | |||||
| }; | |||||
| } // namespace mindspore::kernel | |||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_PAD_FP16_H_ | |||||
| @@ -28,7 +28,7 @@ class PadCPUKernel : public LiteKernel { | |||||
| PadCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | PadCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs, | ||||
| const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), context_(ctx) { | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | |||||
| pad_param_ = reinterpret_cast<PadParameter *>(parameter); | pad_param_ = reinterpret_cast<PadParameter *>(parameter); | ||||
| } | } | ||||
| @@ -37,14 +37,15 @@ class PadCPUKernel : public LiteKernel { | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int RunImpl(int task_id); | |||||
| virtual int RunImpl(int task_id); | |||||
| private: | |||||
| const lite::Context *context_; | |||||
| protected: | |||||
| const PadParameter *pad_param_; | const PadParameter *pad_param_; | ||||
| int in_[4] = {1, 1, 1, 1}; | int in_[4] = {1, 1, 1, 1}; | ||||
| int out_[4] = {1, 1, 1, 1}; | int out_[4] = {1, 1, 1, 1}; | ||||
| }; | }; | ||||
| int PadImpl(void *cdata, int task_id); | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_PAD_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_PAD_H_ | ||||