| @@ -30,8 +30,8 @@ typedef struct CropParameter { | |||
| int64_t offset_[CROP_OFFSET_MAX_SIZE]; | |||
| int64_t in_offset_[CROP_OFFSET_MAX_SIZE]; | |||
| int64_t axis_; | |||
| const int *in_shape_; | |||
| const int *out_shape_; | |||
| int *in_shape_; | |||
| int *out_shape_; | |||
| int input_dim_; | |||
| } CropParameter; | |||
| @@ -13,6 +13,8 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_ | |||
| #define MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_ | |||
| #include "src/ops/arithmetic.h" | |||
| @@ -21,3 +23,4 @@ namespace lite { | |||
| ArithmeticParameter *PopulateArithmeticCommonPara(const mindspore::lite::PrimitiveC *primitive); | |||
| } // namespace lite | |||
| } // namespace mindspore | |||
| #endif // MINDSPORE_LITE_SRC_OPS_POPULATE_ARITHMETIC_POPULATE_H_ | |||
| @@ -48,6 +48,7 @@ class Registry { | |||
| Registry(schema::PrimitiveType primitive_type, ParameterCreator creator) { | |||
| PopulateRegistry::GetInstance()->insertParameterMap(primitive_type, creator); | |||
| } | |||
| ~Registry() = default; | |||
| }; | |||
| OpParameter *PopulateArithmetic(const mindspore::lite::PrimitiveC *primitive); | |||
| OpParameter *PopulateStridedSliceParameter(const mindspore::lite::PrimitiveC *primitive); | |||
| @@ -24,7 +24,6 @@ using GetSchemaDef = std::function<std::string()>; | |||
| class SchemaRegisterImpl { | |||
| public: | |||
| SchemaRegisterImpl() = default; | |||
| static SchemaRegisterImpl *Instance() { | |||
| static SchemaRegisterImpl instance; | |||
| return &instance; | |||
| @@ -67,12 +67,15 @@ void ConvolutionBaseCPUKernel::FreeQuantParam() { | |||
| } | |||
| if (conv_quant_arg_->input_quant_args_ != nullptr) { | |||
| free(conv_quant_arg_->input_quant_args_); | |||
| conv_quant_arg_->input_quant_args_ = nullptr; | |||
| } | |||
| if (conv_quant_arg_->filter_quant_args_ != nullptr) { | |||
| free(conv_quant_arg_->filter_quant_args_); | |||
| conv_quant_arg_->filter_quant_args_ = nullptr; | |||
| } | |||
| if (conv_quant_arg_->output_quant_args_ != nullptr) { | |||
| free(conv_quant_arg_->output_quant_args_); | |||
| conv_quant_arg_->output_quant_args_ = nullptr; | |||
| } | |||
| } | |||
| @@ -33,7 +33,16 @@ class CropBaseCPUKernel : public LiteKernel { | |||
| crop_para_ = reinterpret_cast<CropParameter *>(op_parameter_); | |||
| crop_para_->thread_count_ = op_parameter_->thread_num_; | |||
| } | |||
| ~CropBaseCPUKernel() = default; | |||
| ~CropBaseCPUKernel() { | |||
| if (crop_para_->in_shape_ != nullptr) { | |||
| free(crop_para_->in_shape_); | |||
| crop_para_->in_shape_ = nullptr; | |||
| } | |||
| if (crop_para_->out_shape_ != nullptr) { | |||
| free(crop_para_->out_shape_); | |||
| crop_para_->out_shape_ = nullptr; | |||
| } | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -101,6 +101,10 @@ int ReduceBaseCPUKernel::Init() { | |||
| if (in_tensors_.size() > 1) { | |||
| auto axes_ptr = in_tensors_.at(1); | |||
| num_axes_ = axes_ptr->ElementsNum(); | |||
| if (axes_ptr->ElementsNum() > REDUCE_MAX_AXES_NUM) { | |||
| MS_LOG(ERROR) << "input axes invalid."; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(axes_, axes_ptr->MutableData(), axes_ptr->Size()); | |||
| } else { | |||
| num_axes_ = reduce_param->num_axes_; | |||
| @@ -105,10 +105,6 @@ int ConcatFp16CPUKernel::Run() { | |||
| const auto in_tensor = in_tensors_[i]; | |||
| if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) { | |||
| auto in_tensor_data = reinterpret_cast<float *>(in_tensor->MutableData()); | |||
| if (in_tensor_data == nullptr) { | |||
| MS_LOG(ERROR) << "got nullptr when cast in_tensor to float ptr"; | |||
| return RET_ERROR; | |||
| } | |||
| Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum()); | |||
| } else { | |||
| fp16_inputs_[i] = reinterpret_cast<float16_t *>(in_tensor->MutableData()); | |||
| @@ -221,6 +221,7 @@ int Convolution1x1FP16CPUKernel::Run() { | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get executor tensor failed."; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return ret; | |||
| } | |||
| @@ -228,6 +229,7 @@ int Convolution1x1FP16CPUKernel::Run() { | |||
| ctx_->allocator->Malloc(matmul_param_->row_16_ * matmul_param_->deep_ * sizeof(float16_t))); | |||
| if (pack_input_ == nullptr) { | |||
| MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| @@ -249,6 +251,9 @@ int Convolution1x1FP16CPUKernel::Run() { | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ParallelLaunch failed."; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| ctx_->allocator->Free(pack_input_); | |||
| pack_input_ = nullptr; | |||
| return ret; | |||
| } | |||
| } | |||
| @@ -256,10 +261,8 @@ int Convolution1x1FP16CPUKernel::Run() { | |||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| if (pack_input_ != nullptr) { | |||
| ctx_->allocator->Free(pack_input_); | |||
| pack_input_ = nullptr; | |||
| } | |||
| ctx_->allocator->Free(pack_input_); | |||
| pack_input_ = nullptr; | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -91,9 +91,11 @@ void ConvolutionBaseFP16CPUKernel::IfCastOutput() { | |||
| void ConvolutionBaseFP16CPUKernel::FreeTmpBuffer() { | |||
| if (in_data_type_ == kNumberTypeFloat32) { | |||
| context_->allocator->Free(execute_input_); | |||
| execute_input_ = nullptr; | |||
| } | |||
| if (out_data_type_ == kNumberTypeFloat32) { | |||
| context_->allocator->Free(execute_output_); | |||
| execute_output_ = nullptr; | |||
| } | |||
| } | |||
| @@ -123,12 +123,11 @@ int ConvolutionDepthwiseFp16CPUKernel::Run() { | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwFp16Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDwFp16Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return RET_OK; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernel *CpuConvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -35,7 +35,7 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() { | |||
| sliding_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| } | |||
| @@ -143,12 +143,17 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| context_->allocator->Free(packed_input_); | |||
| context_->allocator->Free(packed_output_); | |||
| return ret; | |||
| } | |||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||
| context_->allocator->Free(packed_input_); | |||
| context_->allocator->Free(packed_output_); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return ret; | |||
| } | |||
| if (need_align_) { | |||
| @@ -164,7 +169,6 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWFp16Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDwSWFp16Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| if (need_align_) { | |||
| PackNHWC8ToNHWCFp16(packed_output_, execute_output_, conv_param_->output_batch_, | |||
| @@ -176,5 +180,4 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return RET_OK; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -154,25 +154,26 @@ int ConvolutionFP16CPUKernel::Run() { | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return ret; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| FreeTmpBuffer(); | |||
| return RET_ERROR; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "conv fp16 error error_code[" << error_code << "]"; | |||
| FreeTmpBuffer(); | |||
| return RET_ERROR; | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionFp16Impl, this, thread_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "conv fp16 error ret[" << ret << "]"; | |||
| } | |||
| FreeTmpBuffer(); | |||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return RET_OK; | |||
| FreeTmpBuffer(); | |||
| return ret; | |||
| } | |||
| ConvParameter *CreateNewConvParameterFp16(ConvParameter *parameter) { | |||
| @@ -354,7 +355,6 @@ kernel::LiteKernel *CpuGroupConvFp16KernelCreator(const std::vector<lite::Tensor | |||
| MS_LOG(ERROR) << "Get new conv parameter failed."; | |||
| return nullptr; | |||
| } | |||
| // create new input for each group | |||
| auto in_tensor = CreateInputTensor(inputs.front()->data_type(), in_shape, infered_flag); | |||
| if (in_tensor == nullptr) { | |||
| @@ -218,26 +218,26 @@ int ConvolutionWinogradFP16CPUKernel::Run() { | |||
| auto ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return ret; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| FreeTmpBuffer(); | |||
| return RET_ERROR; | |||
| } | |||
| int error_code = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "conv winograd error error_code[" << error_code << "]"; | |||
| FreeTmpBuffer(); | |||
| return RET_ERROR; | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvolutionWinogradFp16Impl, this, thread_count_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "conv winograd error error_code[" << ret << "]"; | |||
| } | |||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| FreeTmpBuffer(); | |||
| return RET_OK; | |||
| return ret; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -62,15 +62,10 @@ static int CropFp16Run(void *cdata, int task_id) { | |||
| int CropFp16CPUKernel::Run() { | |||
| input_ptr_ = ConvertInputFp32toFp16(in_tensors_.at(kInputIndex), context_); | |||
| if (input_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "input or output is nullptr"; | |||
| return RET_ERROR; | |||
| } | |||
| output_ptr_ = MallocOutputFp16(out_tensors_.at(kOutputIndex), context_); | |||
| if (output_ptr_ == nullptr) { | |||
| FreeInputAndOutput(); | |||
| if (input_ptr_ == nullptr || output_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "input or output is nullptr"; | |||
| FreeInputAndOutput(); | |||
| return RET_ERROR; | |||
| } | |||
| @@ -78,16 +73,12 @@ int CropFp16CPUKernel::Run() { | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ParallelLaunch failed: " << ret; | |||
| FreeInputAndOutput(); | |||
| return ret; | |||
| } | |||
| if (out_tensors_.at(kOutputIndex)->data_type() == kNumberTypeFloat32) { | |||
| Float16ToFloat32(output_ptr_, reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData()), | |||
| out_tensors_.at(kOutputIndex)->ElementsNum()); | |||
| } | |||
| FreeInputAndOutput(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Crop error error_code[" << ret << "]"; | |||
| } | |||
| return ret; | |||
| } | |||
| @@ -35,7 +35,7 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { | |||
| sliding_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| } | |||
| @@ -159,12 +159,17 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitBuffer failed."; | |||
| context_->allocator->Free(packed_input_); | |||
| context_->allocator->Free(packed_output_); | |||
| return RET_ERROR; | |||
| } | |||
| ret = ConvolutionBaseFP16CPUKernel::GetExecuteTensor(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Get Execute tensor failed."; | |||
| context_->allocator->Free(packed_input_); | |||
| context_->allocator->Free(packed_output_); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return ret; | |||
| } | |||
| if (need_align_) { | |||
| @@ -181,7 +186,6 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwFp16Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DeconvDwFp16Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| if (need_align_) { | |||
| @@ -192,7 +196,7 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { | |||
| } | |||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| return RET_OK; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernel *CpuDeconvDwFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -184,6 +184,8 @@ int DeConvolutionFp16CPUKernel::Run() { | |||
| int error_code = InitRunBuf(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv fp32 InitRunBuf error! error_code[" << error_code << "]"; | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| FreeRunBuf(); | |||
| return RET_ERROR; | |||
| } | |||
| @@ -196,15 +198,13 @@ int DeConvolutionFp16CPUKernel::Run() { | |||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvFp16Run, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv fp32 run error! error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| ConvolutionBaseFP16CPUKernel::IfCastOutput(); | |||
| ConvolutionBaseFP16CPUKernel::FreeTmpBuffer(); | |||
| FreeRunBuf(); | |||
| return RET_OK; | |||
| return error_code; | |||
| } | |||
| kernel::LiteKernel *CpuDeConvFp16KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -218,6 +218,7 @@ int Convolution3x3Int8CPUKernel::Run() { | |||
| auto ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| FreeTmpBuffer(); | |||
| return RET_ERROR; | |||
| } | |||
| auto input_addr = reinterpret_cast<int8_t *>(in_tensors_.at(kInputIndex)->MutableData()); | |||
| @@ -61,6 +61,7 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() { | |||
| packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); | |||
| if (packed_weight_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| free(tmp_weight); | |||
| return RET_ERROR; | |||
| } | |||
| bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; | |||
| @@ -55,6 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||
| packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t))); | |||
| if (packed_weight_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| free(tmp_weight); | |||
| return RET_ERROR; | |||
| } | |||
| @@ -143,6 +144,8 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() { | |||
| auto ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | |||
| context_->allocator->Free(row_buffer_); | |||
| row_buffer_ = nullptr; | |||
| return ret; | |||
| } | |||
| @@ -155,11 +158,10 @@ int ConvolutionDepthwiseInt8CPUKernel::Run() { | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwInt8Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDwInt8Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| context_->allocator->Free(row_buffer_); | |||
| return RET_OK; | |||
| row_buffer_ = nullptr; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernel *CpuConvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -87,7 +87,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitBuffer() { | |||
| int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * | |||
| UP_DIV(conv_param_->output_channel_, C8NUM); | |||
| packed_output_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); | |||
| if (packed_input_ == nullptr) { | |||
| if (packed_output_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| @@ -322,6 +322,12 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | |||
| auto ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | |||
| if (need_align_) { | |||
| context_->allocator->Free(packed_input_); | |||
| context_->allocator->Free(packed_output_); | |||
| packed_input_ = nullptr; | |||
| packed_output_ = nullptr; | |||
| } | |||
| return ret; | |||
| } | |||
| @@ -342,7 +348,6 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | |||
| ret = ParallelLaunch(this->context_->thread_pool_, ConvDwSWInt8Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvDwSWInt8Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| if (need_align_) { | |||
| @@ -350,8 +355,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::Run() { | |||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | |||
| context_->allocator->Free(packed_input_); | |||
| context_->allocator->Free(packed_output_); | |||
| packed_input_ = nullptr; | |||
| packed_output_ = nullptr; | |||
| } | |||
| return RET_OK; | |||
| return ret; | |||
| } | |||
| } // namespace mindspore::kernel | |||
| @@ -117,10 +117,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| if (packed_input_ == nullptr) { | |||
| MS_LOG(ERROR) << "Malloc buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -177,6 +173,13 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { | |||
| auto ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!"; | |||
| context_->allocator->Free(packed_input_); | |||
| packed_input_ = nullptr; | |||
| context_->allocator->Free(output_buffer_); | |||
| output_buffer_ = nullptr; | |||
| if (need_align_) { | |||
| context_->allocator->Free(packed_output_); | |||
| } | |||
| return ret; | |||
| } | |||
| @@ -194,17 +197,19 @@ int DeconvolutionDepthwiseInt8CPUKernel::Run() { | |||
| ret = ParallelLaunch(this->context_->thread_pool_, DeconvDwInt8Run, this, conv_param_->thread_num_); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DeconvDwInt8Run error: error_code[" << ret << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| if (need_align_) { | |||
| PackNHWC4ToNHWCInt8(packed_output_, output_addr, conv_param_->output_batch_, | |||
| conv_param_->output_h_ * conv_param_->output_w_, conv_param_->output_channel_); | |||
| context_->allocator->Free(packed_output_); | |||
| packed_output_ = nullptr; | |||
| } | |||
| context_->allocator->Free(packed_input_); | |||
| packed_input_ = nullptr; | |||
| context_->allocator->Free(output_buffer_); | |||
| return RET_OK; | |||
| output_buffer_ = nullptr; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernel *CpuDeconvDwInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -256,6 +256,7 @@ int DeConvInt8CPUKernel::Run() { | |||
| int error_code = InitRunBuf(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 InitRunBuf error! error_code[" << error_code << "]"; | |||
| FreeRunBuf(); | |||
| return RET_ERROR; | |||
| } | |||
| @@ -270,12 +271,10 @@ int DeConvInt8CPUKernel::Run() { | |||
| error_code = ParallelLaunch(this->context_->thread_pool_, DeConvInt8Run, this, thread_count_); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 run error! error_code[" << error_code << "]"; | |||
| return RET_ERROR; | |||
| } | |||
| } | |||
| FreeRunBuf(); | |||
| return RET_OK; | |||
| return error_code; | |||
| } | |||
| kernel::LiteKernel *CpuDeConvInt8KernelCreator(const std::vector<lite::Tensor *> &inputs, | |||
| @@ -110,6 +110,8 @@ int DivInt8CPUKernel::Run() { | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| context_->allocator->Free(tile0_data_); | |||
| context_->allocator->Free(tile1_data_); | |||
| tile0_data_ = nullptr; | |||
| tile1_data_ = nullptr; | |||
| return RET_ERROR; | |||
| } | |||
| TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->MutableData()), | |||
| @@ -120,6 +122,8 @@ int DivInt8CPUKernel::Run() { | |||
| if (broadcast_) { | |||
| context_->allocator->Free(tile0_data_); | |||
| context_->allocator->Free(tile1_data_); | |||
| tile0_data_ = nullptr; | |||
| tile1_data_ = nullptr; | |||
| } | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]"; | |||
| @@ -52,25 +52,32 @@ int FullconnectionInt8CPUKernel::ReSize() { | |||
| d16_ = UP_ROUND(fc_param_->deep_, 16); | |||
| thread_count_ = MSMIN(thread_count_, UP_DIV(c4_, 4)); | |||
| thread_stride_ = UP_DIV(UP_DIV(c4_, 4), thread_count_); | |||
| a_r4x16_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(r4_ * d16_ * sizeof(int8_t))); | |||
| if (!a_r4x16_ptr_) return RET_MEMORY_FAILED; | |||
| memset(a_r4x16_ptr_, 0, r4_ * d16_ * sizeof(int8_t)); | |||
| b_c16x4_ptr_ = reinterpret_cast<int8_t *>(ctx_->allocator->Malloc(c4_ * d16_ * sizeof(int8_t))); | |||
| if (!b_c16x4_ptr_) return RET_MEMORY_FAILED; | |||
| memset(b_c16x4_ptr_, 0, c4_ * d16_ * sizeof(int8_t)); | |||
| input_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(r4_ * sizeof(int))); | |||
| if (!input_sums_) return RET_MEMORY_FAILED; | |||
| memset(input_sums_, 0, r4_ * sizeof(int)); | |||
| weight_bias_sums_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(c4_ * sizeof(int))); | |||
| if (!weight_bias_sums_) return RET_MEMORY_FAILED; | |||
| if (a_r4x16_ptr_ == nullptr || b_c16x4_ptr_ == nullptr || input_sums_ == nullptr || weight_bias_sums_ == nullptr) { | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memset(a_r4x16_ptr_, 0, r4_ * d16_ * sizeof(int8_t)); | |||
| memset(b_c16x4_ptr_, 0, c4_ * d16_ * sizeof(int8_t)); | |||
| memset(input_sums_, 0, r4_ * sizeof(int)); | |||
| memset(weight_bias_sums_, 0, c4_ * sizeof(int)); | |||
| if (in_tensors_.size() == 3) { | |||
| auto bias_len = fc_param_->col_8_ * sizeof(int); | |||
| bias_ptr_ = reinterpret_cast<int *>(ctx_->allocator->Malloc(bias_len)); | |||
| if (!bias_ptr_) return RET_MEMORY_FAILED; | |||
| if (bias_ptr_ == nullptr) { | |||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||
| FreeTmpBuffer(); | |||
| return RET_MEMORY_FAILED; | |||
| } | |||
| memcpy(bias_ptr_, in_tensors_[2]->data_c(), bias_len); | |||
| } else { | |||
| bias_ptr_ = NULL; | |||
| bias_ptr_ = nullptr; | |||
| } | |||
| auto input_tensor = in_tensors_[0]; | |||
| @@ -83,15 +83,9 @@ int GatherInt8CPUKernel::DoGather(int task_id) { | |||
| int count = MSMIN(stride, outer_size - stride * task_id); | |||
| auto thread_stride = stride * task_id; | |||
| int error_code; | |||
| input_ptr += thread_stride * limit; | |||
| output_ptr += thread_stride * indices_element_size; | |||
| error_code = GatherInt8(input_ptr, output_ptr, count, inner_size, limit, indices_ptr, indices_element_size, param_); | |||
| if (error_code != RET_OK) { | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| return GatherInt8(input_ptr, output_ptr, count, inner_size, limit, indices_ptr, indices_element_size, param_); | |||
| } | |||
| int GatherInt8Run(void *cdata, int task_id) { | |||
| @@ -13,6 +13,8 @@ | |||
| * See the License for the specific language governing permissions and | |||
| * limitations under the License. | |||
| */ | |||
| #ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_ | |||
| #define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_ | |||
| #include <stdlib.h> | |||
| #include <stdbool.h> | |||
| @@ -42,3 +44,4 @@ void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_OPT_OP_HANDLER_H_ | |||