From: @ling_qiao_min Reviewed-by: @zhang_xue_tong Signed-off-by: @zhang_xue_tongtags/v1.2.0-rc1
| @@ -36,13 +36,9 @@ class ConcatFp16CPUKernel : public LiteKernel { | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | : LiteKernel(parameter, inputs, outputs, ctx, primitive) { | ||||
| concat_param_ = reinterpret_cast<ConcatParameter *>(op_parameter_); | concat_param_ = reinterpret_cast<ConcatParameter *>(op_parameter_); | ||||
| } | } | ||||
| ~ConcatFp16CPUKernel() = default; | ~ConcatFp16CPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| @@ -207,18 +207,12 @@ static int Convolution1x1Fp16RunHw(void *cdata, int task_id) { | |||||
| } | } | ||||
| int Convolution1x1FP16CPUKernel::Run() { | int Convolution1x1FP16CPUKernel::Run() { | ||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Get executor tensor failed."; | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | |||||
| } | |||||
| ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| pack_input_ = reinterpret_cast<float16_t *>( | pack_input_ = reinterpret_cast<float16_t *>( | ||||
| ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t))); | ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t))); | ||||
| if (pack_input_ == nullptr) { | if (pack_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; | MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return RET_MEMORY_FAILED; | return RET_MEMORY_FAILED; | ||||
| } | } | ||||
| @@ -232,6 +226,7 @@ int Convolution1x1FP16CPUKernel::Run() { | |||||
| input_ptr_ = batch_in; | input_ptr_ = batch_in; | ||||
| } | } | ||||
| int ret = RET_ERROR; | |||||
| if (multi_thread_by_hw_) { | if (multi_thread_by_hw_) { | ||||
| ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_); | ret = ParallelLaunch(this->context_->thread_pool_, Convolution1x1Fp16RunHw, this, thread_count_); | ||||
| } else { | } else { | ||||
| @@ -240,16 +235,12 @@ int Convolution1x1FP16CPUKernel::Run() { | |||||
| } | } | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ParallelLaunch failed."; | MS_LOG(ERROR) << "ParallelLaunch failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| ctx_->allocator->Free(pack_input_); | ctx_->allocator->Free(pack_input_); | ||||
| pack_input_ = nullptr; | pack_input_ = nullptr; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| ctx_->allocator->Free(pack_input_); | ctx_->allocator->Free(pack_input_); | ||||
| pack_input_ = nullptr; | pack_input_ = nullptr; | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -33,19 +33,10 @@ ConvolutionBaseFP16CPUKernel::~ConvolutionBaseFP16CPUKernel() { | |||||
| } | } | ||||
| int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() { | int ConvolutionBaseFP16CPUKernel::GetExecuteTensor() { | ||||
| // ===================input====================// | |||||
| auto input_tensor = in_tensors_.at(kInputIndex); | |||||
| in_data_type_ = input_tensor->data_type(); | |||||
| MS_ASSERT(in_data_type_ == kNumberTypeFloat32 || in_data_type_ == kNumberTypeFloat16); | |||||
| execute_input_ = ConvertInputFp32toFp16(input_tensor, context_); | |||||
| // ==================output====================// | |||||
| auto out_tensor = out_tensors_.at(kOutputIndex); | |||||
| out_data_type_ = out_tensor->data_type(); | |||||
| MS_ASSERT(out_data_type_ == kNumberTypeFloat32 || out_data_type_ == kNumberTypeFloat16); | |||||
| execute_output_ = MallocOutputFp16(out_tensor, context_); | |||||
| auto input_tensor = in_tensors_.at(0); | |||||
| auto output_tensor = out_tensors_.at(0); | |||||
| execute_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c()); | |||||
| execute_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c()); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -78,25 +69,4 @@ int ConvolutionBaseFP16CPUKernel::GetExecuteFilter() { | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void ConvolutionBaseFP16CPUKernel::IfCastOutput() { | |||||
| if (out_data_type_ == kNumberTypeFloat32) { | |||||
| auto out_tensor = out_tensors_.at(kOutputIndex); | |||||
| auto out_ele_num = out_tensor->ElementsNum(); | |||||
| auto output_addr = reinterpret_cast<float *>(out_tensor->MutableData()); | |||||
| Float16ToFloat32(execute_output_, output_addr, out_ele_num); | |||||
| } | |||||
| } | |||||
| void ConvolutionBaseFP16CPUKernel::FreeTmpBuffer() { | |||||
| if (in_data_type_ == kNumberTypeFloat32) { | |||||
| context_->allocator->Free(execute_input_); | |||||
| execute_input_ = nullptr; | |||||
| } | |||||
| if (out_data_type_ == kNumberTypeFloat32) { | |||||
| context_->allocator->Free(execute_output_); | |||||
| execute_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -38,16 +38,12 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int RunImpl(int task_id) { return mindspore::lite::RET_OK; } | int RunImpl(int task_id) { return mindspore::lite::RET_OK; } | ||||
| virtual int GetExecuteTensor(); | virtual int GetExecuteTensor(); | ||||
| virtual int GetExecuteFilter(); | virtual int GetExecuteFilter(); | ||||
| virtual void IfCastOutput(); | |||||
| void FreeTmpBuffer(); | |||||
| protected: | protected: | ||||
| float16_t *fp16_weight_ = nullptr; | float16_t *fp16_weight_ = nullptr; | ||||
| float16_t *execute_input_ = nullptr; | float16_t *execute_input_ = nullptr; | ||||
| float16_t *execute_weight_ = nullptr; | float16_t *execute_weight_ = nullptr; | ||||
| float16_t *execute_output_ = nullptr; | float16_t *execute_output_ = nullptr; | ||||
| TypeId in_data_type_; | |||||
| TypeId out_data_type_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -114,19 +114,13 @@ static int ConvDwFp16Run(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionDepthwiseFp16CPUKernel::Run() { | int ConvolutionDepthwiseFp16CPUKernel::Run() { | ||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||||
| return ret; | |||||
| } | |||||
| ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); | |||||
| auto ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -149,13 +149,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||||
| FreePackedInputOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | |||||
| } | |||||
| ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, | PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, | ||||
| conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | ||||
| @@ -172,8 +167,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreePackedInputOutput(); | FreePackedInputOutput(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -128,17 +128,11 @@ static int ConvolutionFp16Impl(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionFP16CPUKernel::Run() { | int ConvolutionFP16CPUKernel::Run() { | ||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | |||||
| } | |||||
| ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| ret = InitTmpBuffer(); | |||||
| auto ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -147,8 +141,7 @@ int ConvolutionFP16CPUKernel::Run() { | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; | MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -195,17 +195,11 @@ static int ConvolutionWinogradFp16Impl(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ConvolutionWinogradFP16CPUKernel::Run() { | int ConvolutionWinogradFP16CPUKernel::Run() { | ||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | |||||
| } | |||||
| ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| ret = InitTmpBuffer(); | |||||
| auto ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -215,8 +209,6 @@ int ConvolutionWinogradFP16CPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -162,13 +162,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||||
| FreePackedInputOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | |||||
| } | |||||
| ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, | PackNHWCToNHWC8Fp16(execute_input_, packed_input_, conv_param_->input_batch_, | ||||
| conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | conv_param_->input_h_ * conv_param_->input_w_, conv_param_->input_channel_); | ||||
| @@ -189,8 +184,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreePackedInputOutput(); | FreePackedInputOutput(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -189,7 +189,6 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| int error_code = InitRunBuf(); | int error_code = InitRunBuf(); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv fp16 InitRunBuf error! error_code[" << error_code << "]"; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -206,8 +205,6 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| } | } | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| return error_code; | return error_code; | ||||
| } | } | ||||
| @@ -405,9 +405,6 @@ int DeConvWinogradFp16CPUKernel::Run() { | |||||
| ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp16Run, this, thread_num_hw_); | ParallelLaunch(this->context_->thread_pool_, DeConvWgPostFp16Run, this, thread_num_hw_); | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -33,9 +33,6 @@ using mindspore::schema::PrimitiveType_Scale; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ScaleFp16CPUKernel::InitScaleOffset() { | int ScaleFp16CPUKernel::InitScaleOffset() { | ||||
| auto input_tensor = in_tensors_.at(0); | |||||
| malloc_input_ = input_tensor->data_type() == kNumberTypeFloat32; | |||||
| auto scale_tensor = in_tensors_.at(1); | auto scale_tensor = in_tensors_.at(1); | ||||
| malloc_scale_ = scale_tensor->data_type() == kNumberTypeFloat32; | malloc_scale_ = scale_tensor->data_type() == kNumberTypeFloat32; | ||||
| @@ -45,9 +42,6 @@ int ScaleFp16CPUKernel::InitScaleOffset() { | |||||
| auto offset_tensor = in_tensors_.at(2); | auto offset_tensor = in_tensors_.at(2); | ||||
| malloc_offset_ = offset_tensor->data_type() == kNumberTypeFloat32; | malloc_offset_ = offset_tensor->data_type() == kNumberTypeFloat32; | ||||
| } | } | ||||
| auto output_tensor = out_tensors_.at(0); | |||||
| malloc_output_ = output_tensor->data_type() == kNumberTypeFloat32; | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -103,6 +97,11 @@ int ScaleFp16Run(void *cdata, int task_id) { | |||||
| } | } | ||||
| int ScaleFp16CPUKernel::Run() { | int ScaleFp16CPUKernel::Run() { | ||||
| auto input_tensor = in_tensors_.at(0); | |||||
| auto output_tensor = out_tensors_.at(0); | |||||
| input_ = reinterpret_cast<float16_t *>(input_tensor->MutableData()); | |||||
| output_ = reinterpret_cast<float16_t *>(output_tensor->MutableData()); | |||||
| auto ret = InitScaleOffset(); | auto ret = InitScaleOffset(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed."; | MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed."; | ||||
| @@ -123,20 +122,11 @@ int ScaleFp16CPUKernel::Run() { | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| // if output tensor is fp32, we need to transform | |||||
| if (malloc_output_) { | |||||
| auto out_tensor = out_tensors_.at(0); | |||||
| Float16ToFloat32(output_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum()); | |||||
| } | |||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { | int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { | ||||
| input_ = ConvertInputFp32toFp16(in_tensors_.at(0), context_); | |||||
| if (input_ == nullptr) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); | scale_ = ConvertInputFp32toFp16(in_tensors_.at(1), context_); | ||||
| if (scale_ == nullptr) { | if (scale_ == nullptr) { | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| @@ -155,18 +145,10 @@ int ScaleFp16CPUKernel::MallocAssignTmpBuffer() { | |||||
| } | } | ||||
| memset(offset_, 0, in_tensors_.at(1)->ElementsNum() * sizeof(float16_t)); | memset(offset_, 0, in_tensors_.at(1)->ElementsNum() * sizeof(float16_t)); | ||||
| } | } | ||||
| output_ = MallocOutputFp16(out_tensors_.at(0), context_); | |||||
| if (output_ == nullptr) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| void ScaleFp16CPUKernel::FreeTmpBuffer() { | void ScaleFp16CPUKernel::FreeTmpBuffer() { | ||||
| if (malloc_input_ && input_ != nullptr) { | |||||
| context_->allocator->Free(input_); | |||||
| input_ = nullptr; | |||||
| } | |||||
| if (malloc_scale_ && scale_ != nullptr) { | if (malloc_scale_ && scale_ != nullptr) { | ||||
| context_->allocator->Free(scale_); | context_->allocator->Free(scale_); | ||||
| scale_ = nullptr; | scale_ = nullptr; | ||||
| @@ -175,10 +157,6 @@ void ScaleFp16CPUKernel::FreeTmpBuffer() { | |||||
| context_->allocator->Free(offset_); | context_->allocator->Free(offset_); | ||||
| offset_ = nullptr; | offset_ = nullptr; | ||||
| } | } | ||||
| if (malloc_output_ && output_ != nullptr) { | |||||
| context_->allocator->Free(output_); | |||||
| output_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Scale, LiteKernelCreator<ScaleFp16CPUKernel>) | REG_KERNEL(kCPU, kNumberTypeFloat16, PrimitiveType_Scale, LiteKernelCreator<ScaleFp16CPUKernel>) | ||||
| @@ -43,10 +43,8 @@ class ScaleFp16CPUKernel : public ScaleCPUKernel { | |||||
| void FreeTmpBuffer(); | void FreeTmpBuffer(); | ||||
| private: | private: | ||||
| bool malloc_input_ = false; | |||||
| bool malloc_scale_ = false; | bool malloc_scale_ = false; | ||||
| bool malloc_offset_ = false; | bool malloc_offset_ = false; | ||||
| bool malloc_output_ = false; | |||||
| float16_t *input_ = nullptr; | float16_t *input_ = nullptr; | ||||
| float16_t *scale_ = nullptr; | float16_t *scale_ = nullptr; | ||||
| @@ -29,7 +29,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Stack; | using mindspore::schema::PrimitiveType_Stack; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int StackFp16CPUKernel::Init() { | int StackFp16CPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -27,9 +27,7 @@ class StackFp16CPUKernel : public StackCPUKernel { | |||||
| const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : StackCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : StackCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~StackFp16CPUKernel() = default; | ~StackFp16CPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| int Run() override; | int Run() override; | ||||