| @@ -30,8 +30,8 @@ typedef struct CropParameter { | |||||
| int64_t offset_[CROP_OFFSET_MAX_SIZE]; | int64_t offset_[CROP_OFFSET_MAX_SIZE]; | ||||
| int64_t in_offset_[CROP_OFFSET_MAX_SIZE]; | int64_t in_offset_[CROP_OFFSET_MAX_SIZE]; | ||||
| int64_t axis_; | int64_t axis_; | ||||
| const int *in_shape_; | |||||
| const int *out_shape_; | |||||
| int *in_shape_; | |||||
| int *out_shape_; | |||||
| int input_dim_; | int input_dim_; | ||||
| } CropParameter; | } CropParameter; | ||||
| @@ -13,6 +13,8 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_ | |||||
| #define MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_ | |||||
| #include "src/ops/arithmetic.h" | #include "src/ops/arithmetic.h" | ||||
| @@ -21,3 +23,4 @@ namespace lite { | |||||
| ArithmeticParameter *PopulateArithmeticCommonPara(const mindspore::lite::PrimitiveC *primitive); | ArithmeticParameter *PopulateArithmeticCommonPara(const mindspore::lite::PrimitiveC *primitive); | ||||
| } // namespace lite | } // namespace lite | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| #endif // MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_ | |||||
| @@ -48,6 +48,7 @@ class Registry { | |||||
| Registry(schema::PrimitiveType primitive_type, ParameterCreator creator) { | Registry(schema::PrimitiveType primitive_type, ParameterCreator creator) { | ||||
| PopulateRegistry::GetInstance()->insertParameterMap(primitive_type, creator); | PopulateRegistry::GetInstance()->insertParameterMap(primitive_type, creator); | ||||
| } | } | ||||
| ~Registry() = default; | |||||
| }; | }; | ||||
| OpParameter *PopulateArithmetic(const mindspore::lite::PrimitiveC *primitive); | OpParameter *PopulateArithmetic(const mindspore::lite::PrimitiveC *primitive); | ||||
| OpParameter *PopulateStridedSliceParameter(const mindspore::lite::PrimitiveC *primitive); | OpParameter *PopulateStridedSliceParameter(const mindspore::lite::PrimitiveC *primitive); | ||||
| @@ -24,7 +24,6 @@ using GetSchemaDef = std::function<std::string()>; | |||||
| class SchemaRegisterImpl { | class SchemaRegisterImpl { | ||||
| public: | public: | ||||
| SchemaRegisterImpl() = default; | |||||
| static SchemaRegisterImpl *Instance() { | static SchemaRegisterImpl *Instance() { | ||||
| static SchemaRegisterImpl instance; | static SchemaRegisterImpl instance; | ||||
| return &instance; | return &instance; | ||||
| @@ -67,12 +67,15 @@ void ConvolutionBaseCPUKernel::FreeQuantParam() { | |||||
| } | } | ||||
| if (conv_quant_arg_->input_quant_args_ != nullptr) { | if (conv_quant_arg_->input_quant_args_ != nullptr) { | ||||
| free(conv_quant_arg_->input_quant_args_); | free(conv_quant_arg_->input_quant_args_); | ||||
| conv_quant_arg_->input_quant_args_ = nullptr; | |||||
| } | } | ||||
| if (conv_quant_arg_->filter_quant_args_ != nullptr) { | if (conv_quant_arg_->filter_quant_args_ != nullptr) { | ||||
| free(conv_quant_arg_->filter_quant_args_); | free(conv_quant_arg_->filter_quant_args_); | ||||
| conv_quant_arg_->filter_quant_args_ = nullptr; | |||||
| } | } | ||||
| if (conv_quant_arg_->output_quant_args_ != nullptr) { | if (conv_quant_arg_->output_quant_args_ != nullptr) { | ||||
| free(conv_quant_arg_->output_quant_args_); | free(conv_quant_arg_->output_quant_args_); | ||||
| conv_quant_arg_->output_quant_args_ = nullptr; | |||||
| } | } | ||||
| } | } | ||||
| @@ -33,7 +33,16 @@ class CropBaseCPUKernel : public LiteKernel { | |||||
| crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_); | crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_); | ||||
| crop_para_->thread_count_ = op_parameter_->thread_num_; | crop_para_->thread_count_ = op_parameter_->thread_num_; | ||||
| } | } | ||||
| ~CropBaseCPUKernel() = default; | |||||
| ~CropBaseCPUKernel() { | |||||
| if (crop_para_->in_shape_ != nullptr) { | |||||
| free(crop_para_->in_shape_); | |||||
| crop_para_->in_shape_ = nullptr; | |||||
| } | |||||
| if (crop_para_->out_shape_ != nullptr) { | |||||
| free(crop_para_->out_shape_); | |||||
| crop_para_->out_shape_ = nullptr; | |||||
| } | |||||
| } | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -101,6 +101,10 @@ int ReduceBaseCPUKernel::Init() { | |||||
| if (in_tensors_.size() > 1) { | if (in_tensors_.size() > 1) { | ||||
| auto axes_ptr = in_tensors_.at(1); | auto axes_ptr = in_tensors_.at(1); | ||||
| num_axes_ = axes_ptr->ElementsNum(); | num_axes_ = axes_ptr->ElementsNum(); | ||||
| if (axes_ptr->ElementsNum() > REDUCE_MAX_AXES_NUM) { | |||||
| MS_LOG(ERROR) << "input axes invalid."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| memcpy(axes_, axes_ptr->MutableData(), axes_ptr->Size()); | memcpy(axes_, axes_ptr->MutableData(), axes_ptr->Size()); | ||||
| } else { | } else { | ||||
| num_axes_ = reduce_param->num_axes_; | num_axes_ = reduce_param->num_axes_; | ||||
| @@ -105,10 +105,6 @@ int ConcatFp16CPUKernel::Run() { | |||||
| const auto in_tensor = in_tensors_[i]; | const auto in_tensor = in_tensors_[i]; | ||||
| if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) { | if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) { | ||||
| auto in_tensor_data = reinterpret_cast<float *>(in_tensor->MutableData()); | auto in_tensor_data = reinterpret_cast<float *>(in_tensor->MutableData()); | ||||
| if (in_tensor_data == nullptr) { | |||||
| MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum()); | Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum()); | ||||
| } else { | } else { | ||||
| fp16_inputs_[i] = reinterpret_cast<float16_t *>(in_tensor->MutableData()); | fp16_inputs_[i] = reinterpret_cast<float16_t *>(in_tensor->MutableData()); | ||||
| @@ -221,6 +221,7 @@ int Convolution1x1FP16CPUKernel::Run() { | |||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Get executor tensor failed."; | MS_LOG(ERROR) << "Get executor tensor failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -228,6 +229,7 @@ int Convolution1x1FP16CPUKernel::Run() { | |||||
| ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t))); | ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t))); | ||||
| if (pack_input_ == nullptr) { | if (pack_input_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; | MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return RET_MEMORY_FAILED; | return RET_MEMORY_FAILED; | ||||
| } | } | ||||
| @@ -249,6 +251,9 @@ int Convolution1x1FP16CPUKernel::Run() { | |||||
| } | } | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ParallelLaunch failed."; | MS_LOG(ERROR) << "ParallelLaunch failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| ctx_->allocator->Free(pack_input_); | |||||
| pack_input_ = nullptr; | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| } | } | ||||
| @@ -256,10 +261,8 @@ int Convolution1x1FP16CPUKernel::Run() { | |||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| if (pack_input_ != nullptr) { | |||||
| ctx_->allocator->Free(pack_input_); | |||||
| pack_input_ = nullptr; | |||||
| } | |||||
| ctx_->allocator->Free(pack_input_); | |||||
| pack_input_ = nullptr; | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -91,9 +91,11 @@ void ConvolutionBaseFP16CPUKernel::IfCastOutput() { | |||||
| void ConvolutionBaseFP16CPUKernel::FreeTmpBuffer() { | void ConvolutionBaseFP16CPUKernel::FreeTmpBuffer() { | ||||
| if (in_data_type_ == kNumberTypeFloat32) { | if (in_data_type_ == kNumberTypeFloat32) { | ||||
| context_->allocator->Free(execute_input_); | context_->allocator->Free(execute_input_); | ||||
| execute_input_ = nullptr; | |||||
| } | } | ||||
| if (out_data_type_ == kNumberTypeFloat32) { | if (out_data_type_ == kNumberTypeFloat32) { | ||||
| context_->allocator->Free(execute_output_); | context_->allocator->Free(execute_output_); | ||||
| execute_output_ = nullptr; | |||||
| } | } | ||||
| } | } | ||||
| @@ -123,12 +123,11 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -35,7 +35,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() { | |||||
| sliding_ = nullptr; | sliding_ = nullptr; | ||||
| } | } | ||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| } | } | ||||
| @@ -143,12 +143,17 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| auto ret = InitBuffer(); | auto ret = InitBuffer(); | ||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | ||||
| return RET_ERROR; | |||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| return ret; | |||||
| } | } | ||||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | MS_LOG(ERROR) << "Get Execute tensor failed."; | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| @@ -164,7 +169,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | ||||
| @@ -176,5 +180,4 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -154,25 +154,26 @@ int ConvolutionFP16CPUKernel::Run() { | |||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | MS_LOG(ERROR) << "Get Execute tensor failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv fp16 error error_code[" << error_code << "]"; | |||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; | |||||
| } | } | ||||
| FreeTmpBuffer(); | |||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| return RET_OK; | |||||
| FreeTmpBuffer(); | |||||
| return ret; | |||||
| } | } | ||||
| ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) { | ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) { | ||||
| @@ -354,7 +355,6 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor | |||||
| MS_LOG(ERROR) << "Get new conv parameter failed."; | MS_LOG(ERROR) << "Get new conv parameter failed."; | ||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| // create new input for each group | // create new input for each group | ||||
| auto in_tensor = CreateInputTensor(inputs.front()->data_type(), in_shape, infered_flag); | auto in_tensor = CreateInputTensor(inputs.front()->data_type(), in_shape, infered_flag); | ||||
| if (in_tensor == nullptr) { | if (in_tensor == nullptr) { | ||||
| @@ -218,26 +218,26 @@ int ConvolutionWinogradFP16CPUKernel::Run() { | |||||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | MS_LOG(ERROR) << "Get Execute tensor failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]"; | |||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | |||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| FreeTmpBuffer(); | FreeTmpBuffer(); | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -62,15 +62,10 @@ static int CropFp16Run(void *cdata, int task_id) { | |||||
| int CropFp16CPUKernel::Run() { | int CropFp16CPUKernel::Run() { | ||||
| input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_); | input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_); | ||||
| if (input_ptr_ == nullptr) { | |||||
| MS_LOG(ERROR) << "input or output is nullptr"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| output_ptr_ = MallocOutputFp16(out_tensors_.at(kOutputIndex), context_); | output_ptr_ = MallocOutputFp16(out_tensors_.at(kOutputIndex), context_); | ||||
| if (output_ptr_ == nullptr) { | |||||
| FreeInputAndOutput(); | |||||
| if (input_ptr_ == nullptr || output_ptr_ == nullptr) { | |||||
| MS_LOG(ERROR) << "input or output is nullptr"; | MS_LOG(ERROR) << "input or output is nullptr"; | ||||
| FreeInputAndOutput(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -78,16 +73,12 @@ int CropFp16CPUKernel::Run() { | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ParallelLaunch failed: " << ret; | MS_LOG(ERROR) << "ParallelLaunch failed: " << ret; | ||||
| FreeInputAndOutput(); | FreeInputAndOutput(); | ||||
| return ret; | |||||
| } | } | ||||
| if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) { | if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) { | ||||
| Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()), | Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()), | ||||
| out_tensors_.at(kOutputIndex)->ElementsNum()); | out_tensors_.at(kOutputIndex)->ElementsNum()); | ||||
| } | } | ||||
| FreeInputAndOutput(); | FreeInputAndOutput(); | ||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Crop error error_code[" << ret << "]"; | |||||
| } | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -35,7 +35,7 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { | |||||
| sliding_ = nullptr; | sliding_ = nullptr; | ||||
| } | } | ||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| } | } | ||||
| } | } | ||||
| @@ -159,12 +159,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| auto ret = InitBuffer(); | auto ret = InitBuffer(); | ||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; | MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | MS_LOG(ERROR) << "Get Execute tensor failed."; | ||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| @@ -181,7 +186,6 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| @@ -192,7 +196,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -184,6 +184,8 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| int error_code = InitRunBuf(); | int error_code = InitRunBuf(); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||||
| FreeRunBuf(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -196,15 +198,13 @@ int DeConvolutionFp16CPUKernel::Run() { | |||||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); | error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| } | } | ||||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ConvolutionBaseFP16CPUKernel::IfCastOutput(); | ||||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | ||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| return RET_OK; | |||||
| return error_code; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -218,6 +218,7 @@ int Convolution3x3Int8CPUKernel::Run() { | |||||
| auto ret = InitTmpBuffer(); | auto ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| FreeTmpBuffer(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| auto input_addr = reinterpret_cast<int8_t *>(in_tensors_.at(kInputIndex)->MutableData()); | auto input_addr = reinterpret_cast<int8_t *>(in_tensors_.at(kInputIndex)->MutableData()); | ||||
| @@ -61,6 +61,7 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() { | |||||
| packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); | packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); | ||||
| if (packed_weight_ == nullptr) { | if (packed_weight_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| free(tmp_weight); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; | bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; | ||||
| @@ -55,6 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||||
| packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); | packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); | ||||
| if (packed_weight_ == nullptr) { | if (packed_weight_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| free(tmp_weight); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -143,6 +144,8 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| auto ret = InitBuffer(); | auto ret = InitBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | ||||
| context_->allocator->Free(row_buffer_); | |||||
| row_buffer_ = nullptr; | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -155,11 +158,10 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwInt8Run, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, ConvDwInt8Run, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| context_->allocator->Free(row_buffer_); | context_->allocator->Free(row_buffer_); | ||||
| return RET_OK; | |||||
| row_buffer_ = nullptr; | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -87,7 +87,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() { | |||||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * | int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * | ||||
| UP_DIV(conv_param_->output_channel_, C8NUM); | UP_DIV(conv_param_->output_channel_, C8NUM); | ||||
| packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); | packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); | ||||
| if (packed_input_ == nullptr) { | |||||
| if (packed_output_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -322,6 +322,12 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | |||||
| auto ret = InitBuffer(); | auto ret = InitBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | ||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_input_); | |||||
| context_->allocator->Free(packed_output_); | |||||
| packed_input_ = nullptr; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -342,7 +348,6 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWInt8Run, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWInt8Run, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvDwSWInt8Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "ConvDwSWInt8Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| @@ -350,8 +355,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | |||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_input_); | context_->allocator->Free(packed_input_); | ||||
| context_->allocator->Free(packed_output_); | context_->allocator->Free(packed_output_); | ||||
| packed_input_ = nullptr; | |||||
| packed_output_ = nullptr; | |||||
| } | } | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -117,10 +117,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | MS_LOG(ERROR) << "Malloc buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| if (packed_input_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -177,6 +173,13 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| auto ret = InitBuffer(); | auto ret = InitBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!"; | MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!"; | ||||
| context_->allocator->Free(packed_input_); | |||||
| packed_input_ = nullptr; | |||||
| context_->allocator->Free(output_buffer_); | |||||
| output_buffer_ = nullptr; | |||||
| if (need_align_) { | |||||
| context_->allocator->Free(packed_output_); | |||||
| } | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| @@ -194,17 +197,19 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { | |||||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_); | ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| if (need_align_) { | if (need_align_) { | ||||
| PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | ||||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | ||||
| context_->allocator->Free(packed_output_); | context_->allocator->Free(packed_output_); | ||||
| packed_output_ = nullptr; | |||||
| } | } | ||||
| context_->allocator->Free(packed_input_); | context_->allocator->Free(packed_input_); | ||||
| packed_input_ = nullptr; | |||||
| context_->allocator->Free(output_buffer_); | context_->allocator->Free(output_buffer_); | ||||
| return RET_OK; | |||||
| output_buffer_ = nullptr; | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuDeconvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeconvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -256,6 +256,7 @@ int DeConvInt8CPUKernel::Run() { | |||||
| int error_code = InitRunBuf(); | int error_code = InitRunBuf(); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv int8 InitRunBuf error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv int8 InitRunBuf error! error_code[" << error_code << "]"; | ||||
| FreeRunBuf(); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| @@ -270,12 +271,10 @@ int DeConvInt8CPUKernel::Run() { | |||||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvInt8Run, this, thread_count_); | error_code = ParallelLaunch(this->context_->thread_pool_, DeConvInt8Run, this, thread_count_); | ||||
| if (error_code != RET_OK) { | if (error_code != RET_OK) { | ||||
| MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]"; | MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| } | } | ||||
| FreeRunBuf(); | FreeRunBuf(); | ||||
| return RET_OK; | |||||
| return error_code; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuDeConvInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | kernel::LiteKernel *CpuDeConvInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | ||||
| @@ -110,6 +110,8 @@ int DivInt8CPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | MS_LOG(ERROR) << "Memory allocation failed"; | ||||
| context_->allocator->Free(tile0_data_); | context_->allocator->Free(tile0_data_); | ||||
| context_->allocator->Free(tile1_data_); | context_->allocator->Free(tile1_data_); | ||||
| tile0_data_ = nullptr; | |||||
| tile1_data_ = nullptr; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->MutableData()), | TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->MutableData()), | ||||
| @@ -120,6 +122,8 @@ int DivInt8CPUKernel::Run() { | |||||
| if (broadcast_) { | if (broadcast_) { | ||||
| context_->allocator->Free(tile0_data_); | context_->allocator->Free(tile0_data_); | ||||
| context_->allocator->Free(tile1_data_); | context_->allocator->Free(tile1_data_); | ||||
| tile0_data_ = nullptr; | |||||
| tile1_data_ = nullptr; | |||||
| } | } | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]"; | MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]"; | ||||
| @@ -52,25 +52,32 @@ int FullconnectionInt8CPUKernel::ReSize() { | |||||
| d16_ = UP_ROUND(fc_param_->deep_, 16); | d16_ = UP_ROUND(fc_param_->deep_, 16); | ||||
| thread_count_ = MSMIN(thread_count_, UP_DIV(c4_, 4)); | thread_count_ = MSMIN(thread_count_, UP_DIV(c4_, 4)); | ||||
| thread_stride_ = UP_DIV(UP_DIV(c4_, 4), thread_count_); | thread_stride_ = UP_DIV(UP_DIV(c4_, 4), thread_count_); | ||||
| a_r4x16_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(r4_ * d16_ * sizeof(int8_t))); | a_r4x16_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(r4_ * d16_ * sizeof(int8_t))); | ||||
| if (!a_r4x16_ptr_) return RET_MEMORY_FAILED; | |||||
| memset(a_r4x16_ptr_, 0, r4_ * d16_ * sizeof(int8_t)); | |||||
| b_c16x4_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(c4_ * d16_ * sizeof(int8_t))); | b_c16x4_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(c4_ * d16_ * sizeof(int8_t))); | ||||
| if (!b_c16x4_ptr_) return RET_MEMORY_FAILED; | |||||
| memset(b_c16x4_ptr_, 0, c4_ * d16_ * sizeof(int8_t)); | |||||
| input_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(r4_ * sizeof(int))); | input_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(r4_ * sizeof(int))); | ||||
| if (!input_sums_) return RET_MEMORY_FAILED; | |||||
| memset(input_sums_, 0, r4_ * sizeof(int)); | |||||
| weight_bias_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(c4_ * sizeof(int))); | weight_bias_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(c4_ * sizeof(int))); | ||||
| if (!weight_bias_sums_) return RET_MEMORY_FAILED; | |||||
| if (a_r4x16_ptr_ == nullptr || b_c16x4_ptr_ == nullptr || input_sums_ == nullptr || weight_bias_sums_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| FreeTmpBuffer(); | |||||
| return RET_MEMORY_FAILED; | |||||
| } | |||||
| memset(a_r4x16_ptr_, 0, r4_ * d16_ * sizeof(int8_t)); | |||||
| memset(b_c16x4_ptr_, 0, c4_ * d16_ * sizeof(int8_t)); | |||||
| memset(input_sums_, 0, r4_ * sizeof(int)); | |||||
| memset(weight_bias_sums_, 0, c4_ * sizeof(int)); | memset(weight_bias_sums_, 0, c4_ * sizeof(int)); | ||||
| if (in_tensors_.size() == 3) { | if (in_tensors_.size() == 3) { | ||||
| auto bias_len = fc_param_->col_8_ * sizeof(int); | auto bias_len = fc_param_->col_8_ * sizeof(int); | ||||
| bias_ptr_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(bias_len)); | bias_ptr_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(bias_len)); | ||||
| if (!bias_ptr_) return RET_MEMORY_FAILED; | |||||
| if (bias_ptr_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| FreeTmpBuffer(); | |||||
| return RET_MEMORY_FAILED; | |||||
| } | |||||
| memcpy(bias_ptr_, in_tensors_[2]->data_c(), bias_len); | memcpy(bias_ptr_, in_tensors_[2]->data_c(), bias_len); | ||||
| } else { | } else { | ||||
| bias_ptr_ = NULL; | |||||
| bias_ptr_ = nullptr; | |||||
| } | } | ||||
| auto input_tensor = in_tensors_[0]; | auto input_tensor = in_tensors_[0]; | ||||
| @@ -83,15 +83,9 @@ int GatherInt8CPUKernel::DoGather(int task_id) { | |||||
| int count = MSMIN(stride, outer_size - stride * task_id); | int count = MSMIN(stride, outer_size - stride * task_id); | ||||
| auto thread_stride = stride * task_id; | auto thread_stride = stride * task_id; | ||||
| int error_code; | |||||
| input_ptr += thread_stride * limit; | input_ptr += thread_stride * limit; | ||||
| output_ptr += thread_stride * indices_element_size; | output_ptr += thread_stride * indices_element_size; | ||||
| error_code = GatherInt8(input_ptr, output_ptr, count, inner_size, limit, indices_ptr, indices_element_size, param_); | |||||
| if (error_code != RET_OK) { | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| return GatherInt8(input_ptr, output_ptr, count, inner_size, limit, indices_ptr, indices_element_size, param_); | |||||
| } | } | ||||
| int GatherInt8Run(void *cdata, int task_id) { | int GatherInt8Run(void *cdata, int task_id) { | ||||
| @@ -13,6 +13,8 @@ | |||||
| * See the License for the specific language governing permissions and | * See the License for the specific language governing permissions and | ||||
| * limitations under the License. | * limitations under the License. | ||||
| */ | */ | ||||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_ | |||||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_ | |||||
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <stdbool.h> | #include <stdbool.h> | ||||
| @@ -42,3 +44,4 @@ void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_ | |||||