| @@ -97,6 +97,13 @@ class MS_API LiteSession { | |||
| /// | |||
| /// \return The vector of MindSpore Lite MSTensor. | |||
| virtual std::vector<tensor::MSTensor *> GetOutputsByName(const std::string &node_name) const = 0; | |||
| /// \brief Resize inputs shape. | |||
| /// | |||
| /// \param[in] inputs Define the new inputs shape. | |||
| /// | |||
| /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h. | |||
| virtual int Resize(const std::vector<tensor::MSTensor *> &inputs) = 0; | |||
| }; | |||
| } // namespace session | |||
| } // namespace mindspore | |||
| @@ -57,22 +57,27 @@ struct KernelKey { | |||
| class LiteKernel { | |||
| public: | |||
| LiteKernel() = default; | |||
| explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &in_tensors, | |||
| const std::vector<lite::tensor::Tensor *> &out_tensors, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &in_tensors, | |||
| const std::vector<lite::tensor::Tensor *> &out_tensors, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : op_parameter_(parameter), | |||
| in_tensors_(in_tensors), | |||
| out_tensors_(out_tensors), | |||
| primitive_(primitive), | |||
| context_(ctx) { | |||
| if (op_parameter_ && ctx) { | |||
| if (op_parameter_ != nullptr && ctx != nullptr) { | |||
| op_parameter_->thread_num_ = ctx->thread_num_; | |||
| } | |||
| this->in_kernels_.clear(); | |||
| this->out_kernels_.clear(); | |||
| } | |||
| virtual ~LiteKernel() { delete op_parameter_; } | |||
| virtual ~LiteKernel() { | |||
| if (op_parameter_ != nullptr) { | |||
| delete op_parameter_; | |||
| op_parameter_ = nullptr; | |||
| } | |||
| } | |||
| virtual int Prepare() { | |||
| if (!InferShapeDone()) { | |||
| @@ -149,6 +154,8 @@ class LiteKernel { | |||
| void set_need_reinit() { need_reinit_ = true; } | |||
| const lite::Primitive *GetPrimitive() const { return primitive_; } | |||
| protected: | |||
| bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()) && true; } | |||
| @@ -315,6 +315,41 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByName(const s | |||
| } | |||
| return ret->second; | |||
| } | |||
| int LiteSession::ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs) { | |||
| if (inputs.size() != inputs_.size()) { | |||
| MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size(); | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| for (size_t i = 0; i < inputs.size(); ++i) { | |||
| if (inputs[i] == nullptr) { | |||
| MS_LOG(ERROR) << "Input tensor is nullptr!"; | |||
| return RET_PARAM_INVALID; | |||
| } | |||
| inputs_[i]->set_shape(inputs[i]->shape()); | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs) { | |||
| inputs_old_.clear(); | |||
| inputs_old_ = inputs_; | |||
| auto ret = ResizeInputs(inputs); | |||
| if (ret != RET_OK) { | |||
| inputs_ = inputs_old_; | |||
| return ret; | |||
| } | |||
| Scheduler scheduler(context_); | |||
| ret = scheduler.ReSizeKernels(kernels_); | |||
| if (ret != RET_OK) { | |||
| inputs_ = inputs_old_; | |||
| scheduler.ReSizeKernels(kernels_); | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| } // namespace lite | |||
| session::LiteSession *session::LiteSession::CreateSession(lite::Context *context) { | |||
| @@ -327,4 +362,5 @@ session::LiteSession *session::LiteSession::CreateSession(lite::Context *context | |||
| } | |||
| return session; | |||
| } | |||
| } // namespace mindspore | |||
| @@ -54,6 +54,8 @@ class LiteSession : public session::LiteSession { | |||
| std::vector<mindspore::tensor::MSTensor *> GetOutputsByName(const std::string &name) const override; | |||
| int Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs) override; | |||
| protected: | |||
| int ConvertTensors(const lite::Model *model); | |||
| @@ -68,6 +70,8 @@ class LiteSession : public session::LiteSession { | |||
| void InitGraphInputMap(const lite::Model *model); | |||
| // init this->output_map_ | |||
| void InitGraphOutputMap(const lite::Model *model); | |||
| // resize inputs | |||
| int ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs); | |||
| protected: | |||
| Context *context_ = nullptr; | |||
| @@ -75,6 +79,7 @@ class LiteSession : public session::LiteSession { | |||
| std::vector<tensor::Tensor *> tensors_; | |||
| // graph input tensors | |||
| std::vector<tensor::Tensor *> inputs_; | |||
| std::vector<tensor::Tensor *> inputs_old_; | |||
| // graph output tensors | |||
| std::vector<tensor::Tensor *> outputs_; | |||
| // graph input MSTensors | |||
| @@ -56,14 +56,14 @@ void PoolingBaseCPUKernel::FreeQuantParam() { | |||
| } | |||
| int PoolingBaseCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| return RET_OK; | |||
| } | |||
| MS_ASSERT(in_tensors_.size() == 1); | |||
| MS_ASSERT(out_tensors_.size() == 1); | |||
| pooling_param_->thread_num_ = thread_count_; | |||
| MS_ASSERT(this->op_parameter_ != nullptr); | |||
| return RET_OK; | |||
| } | |||
| int PoolingBaseCPUKernel::ReSize() { | |||
| auto in_tensor = this->in_tensors_.front(); | |||
| auto out_tensor = this->out_tensors_.front(); | |||
| MS_ASSERT(in_tensor != nullptr); | |||
| @@ -37,7 +37,7 @@ class PoolingBaseCPUKernel : public LiteKernel { | |||
| ~PoolingBaseCPUKernel() = default; | |||
| int Init() override; | |||
| int ReSize() override { return RET_OK; } | |||
| int ReSize() override; | |||
| int Run() override { return RET_OK; } | |||
| int SetQuantParam(); | |||
| void FreeQuantParam(); | |||
| @@ -98,38 +98,21 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() { | |||
| } | |||
| int Convolution1x1FP16CPUKernel::Init() { | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return ret; | |||
| } | |||
| ret = InitMatmulParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init matmul param failed."; | |||
| return ret; | |||
| } | |||
| ret = InitConv1x1Param(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init conv1x1 param failed."; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return ret; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int Convolution1x1FP16CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| if (fp16_weight_ != nullptr) { | |||
| free(fp16_weight_); | |||
| fp16_weight_ = nullptr; | |||
| } | |||
| if (input_ptr_ != nullptr) { | |||
| free(input_ptr_); | |||
| } | |||
| if (weight_ptr_ != nullptr) { | |||
| free(weight_ptr_); | |||
| input_ptr_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| @@ -147,6 +130,11 @@ int Convolution1x1FP16CPUKernel::ReSize() { | |||
| MS_LOG(ERROR) << "Init conv1x1 param failed."; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -34,26 +34,32 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) { | |||
| matmul_param_ = new MatMulParameter(); | |||
| } | |||
| ~Convolution1x1FP16CPUKernel() override { | |||
| if (weight_ptr_ != nullptr) { | |||
| free(weight_ptr_); | |||
| } | |||
| if (pack_input_ != nullptr) { | |||
| free(pack_input_); | |||
| } | |||
| delete matmul_param_; | |||
| } | |||
| ~Convolution1x1FP16CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitBuffer(); | |||
| int InitConv1x1Param(); | |||
| int InitMatmulParam(); | |||
| int InitWeightBias(); | |||
| void Pre1x1Trans(float16_t *src_input, float16_t *src_output); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (weight_ptr_ != nullptr) { | |||
| free(weight_ptr_); | |||
| weight_ptr_ = nullptr; | |||
| } | |||
| if (matmul_param_ != nullptr) { | |||
| delete matmul_param_; | |||
| matmul_param_ = nullptr; | |||
| } | |||
| if (pack_input_ != nullptr) { | |||
| free(pack_input_); | |||
| } | |||
| } | |||
| bool pre_trans_input_ = false; | |||
| int thread_count_ = 0; | |||
| int thread_stride_ = 0; | |||
| @@ -157,52 +157,39 @@ void Convolution3x3FP16CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int Convolution3x3FP16CPUKernel::Init() { | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ConfigInputOutput(); | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int Convolution3x3FP16CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| if (tile_buffer_ != nullptr) { | |||
| free(tile_buffer_); | |||
| } | |||
| if (block_unit_buffer_ != nullptr) { | |||
| free(block_unit_buffer_); | |||
| } | |||
| if (tmp_dst_buffer_ != nullptr) { | |||
| free(tmp_dst_buffer_); | |||
| } | |||
| if (tmp_out_ != nullptr) { | |||
| free(tmp_out_); | |||
| tile_buffer_ = nullptr; | |||
| } | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ConfigInputOutput(); | |||
| return RET_OK; | |||
| } | |||
| @@ -30,41 +30,49 @@ class Convolution3x3FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~Convolution3x3FP16CPUKernel() override { | |||
| ~Convolution3x3FP16CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (fp16_weight_ != nullptr) { | |||
| free(fp16_weight_); | |||
| fp16_weight_ = nullptr; | |||
| } | |||
| if (transformed_filter_addr_ != nullptr) { | |||
| free(transformed_filter_addr_); | |||
| transformed_filter_addr_ = nullptr; | |||
| } | |||
| if (tile_buffer_ != nullptr) { | |||
| free(tile_buffer_); | |||
| tile_buffer_ = nullptr; | |||
| } | |||
| if (block_unit_buffer_ != nullptr) { | |||
| free(block_unit_buffer_); | |||
| block_unit_buffer_ = nullptr; | |||
| } | |||
| if (tmp_dst_buffer_ != nullptr) { | |||
| free(tmp_dst_buffer_); | |||
| tmp_dst_buffer_ = nullptr; | |||
| } | |||
| if (tmp_out_ != nullptr) { | |||
| free(tmp_out_); | |||
| tmp_out_ = nullptr; | |||
| } | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| private: | |||
| float16_t *transformed_filter_addr_; | |||
| float16_t *tile_buffer_; | |||
| float16_t *block_unit_buffer_; | |||
| float16_t *tmp_dst_buffer_; | |||
| float16_t *tmp_out_; | |||
| float16_t *transformed_filter_addr_ = nullptr; | |||
| float16_t *tile_buffer_ = nullptr; | |||
| float16_t *block_unit_buffer_ = nullptr; | |||
| float16_t *tmp_dst_buffer_ = nullptr; | |||
| float16_t *tmp_out_ = nullptr; | |||
| }; | |||
| void ProcessFilterFp16(float16_t *origin_weight, float16_t *dst_weight, ConvParameter *conv_param); | |||
| } // namespace mindspore::kernel | |||
| @@ -43,9 +43,9 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { | |||
| protected: | |||
| float16_t *fp16_weight_ = nullptr; | |||
| float16_t *execute_input_; // ctx allocator malloc and free | |||
| float16_t *execute_weight_; | |||
| float16_t *execute_output_; // ctx allocator malloc and free | |||
| float16_t *execute_input_ = nullptr; | |||
| float16_t *execute_weight_ = nullptr; | |||
| float16_t *execute_output_ = nullptr; | |||
| TypeId in_data_type_; | |||
| TypeId out_data_type_; | |||
| }; | |||
| @@ -29,8 +29,14 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { | |||
| delete sliding_; | |||
| ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { FreeTmpBuffer(); } | |||
| void ConvolutionDepthwiseFp16CPUKernel::FreeTmpBuffer() { | |||
| if (sliding_ != nullptr) { | |||
| delete sliding_; | |||
| sliding_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| @@ -102,6 +108,14 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| // conv base init | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| @@ -125,27 +139,6 @@ int ConvolutionDepthwiseFp16CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::ReSize() { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { | |||
| ConvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, | |||
| sliding_, task_id); | |||
| @@ -51,10 +51,11 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||
| int Execute(int task_id); | |||
| private: | |||
| SlidingWindowParam *sliding_; | |||
| float16_t *packed_weight_; | |||
| float16_t *packed_input_; | |||
| float16_t *packed_output_; | |||
| void FreeTmpBuffer(); | |||
| SlidingWindowParam *sliding_ = nullptr; | |||
| float16_t *packed_weight_ = nullptr; | |||
| float16_t *packed_input_ = nullptr; | |||
| float16_t *packed_output_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -135,46 +135,36 @@ void ConvolutionFP16CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionFP16CPUKernel::Init() { | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ConfigInputOutput(); | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionFP16CPUKernel::ReSize() { | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| } | |||
| FreeTmpBuffer(); | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ConfigInputOutput(); | |||
| return RET_OK; | |||
| } | |||
| @@ -29,33 +29,39 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionFP16CPUKernel() override { | |||
| ~ConvolutionFP16CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (fp16_weight_ != nullptr) { | |||
| free(fp16_weight_); | |||
| fp16_weight_ = nullptr; | |||
| } | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| tmp_output_block_ = nullptr; | |||
| } | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| private: | |||
| float16_t *packed_input_; | |||
| float16_t *packed_weight_; | |||
| float16_t *tmp_output_block_; | |||
| float16_t *packed_input_ = nullptr; | |||
| float16_t *packed_weight_ = nullptr; | |||
| float16_t *tmp_output_block_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -139,6 +139,19 @@ void ConvolutionSWFP16CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionSWFP16CPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionSWFP16CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | |||
| @@ -162,31 +175,6 @@ int ConvolutionSWFP16CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionSWFP16CPUKernel::ReSize() { | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| } | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| } | |||
| delete slidingWindow_param_; | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return ret; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init sliding window param | |||
| slidingWindow_param_ = new SlidingWindowParam; | |||
| InitSlidingParamConv(slidingWindow_param_, conv_param_, C4NUM); | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionSWFP16CPUKernel::RunImpl(int task_id) { | |||
| ConvSWFp16(reinterpret_cast<float16_t *>(nhwc4_input_), packed_weight_, reinterpret_cast<float16_t *>(bias_data_), | |||
| tmp_output_block_, execute_output_, task_id, conv_param_, slidingWindow_param_); | |||
| @@ -28,18 +28,7 @@ class ConvolutionSWFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionSWFP16CPUKernel() override { | |||
| if (fp16_weight_ != nullptr) { | |||
| free(fp16_weight_); | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| } | |||
| delete slidingWindow_param_; | |||
| } | |||
| ~ConvolutionSWFP16CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -51,9 +40,27 @@ class ConvolutionSWFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||
| int ProcessFilter(); | |||
| private: | |||
| float16_t *packed_weight_; | |||
| float16_t *tmp_output_block_; | |||
| SlidingWindowParam *slidingWindow_param_; | |||
| void FreeTmpBuffer() { | |||
| if (fp16_weight_ != nullptr) { | |||
| free(fp16_weight_); | |||
| fp16_weight_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| tmp_output_block_ = nullptr; | |||
| } | |||
| if (slidingWindow_param_ != nullptr) { | |||
| delete slidingWindow_param_; | |||
| slidingWindow_param_ = nullptr; | |||
| } | |||
| } | |||
| float16_t *packed_weight_ = nullptr; | |||
| float16_t *tmp_output_block_ = nullptr; | |||
| SlidingWindowParam *slidingWindow_param_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -265,50 +265,17 @@ int ConvolutionWinogradFP16CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionWinogradFP16CPUKernel::Init() { | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| kernel_unit_ = conv_param_->kernel_h_; | |||
| input_unit_ = output_unit_ + kernel_unit_ - 1; | |||
| conv_param_->input_unit_ = input_unit_; | |||
| conv_param_->output_unit_ = output_unit_; | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // malloc tmp buffer | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ret = ConfigInputOutput(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConfigInputOutput failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionWinogradFP16CPUKernel::ReSize() { | |||
| if (tmp_data_ != nullptr) { | |||
| free(tmp_data_); | |||
| } | |||
| if (trans_input_ != nullptr) { | |||
| free(trans_input_); | |||
| } | |||
| if (gemm_out_ != nullptr) { | |||
| free(gemm_out_); | |||
| } | |||
| if (tmp_out_data_ != nullptr) { | |||
| free(tmp_out_data_); | |||
| } | |||
| FreeTmpBuffer(); | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| @@ -321,6 +288,12 @@ int ConvolutionWinogradFP16CPUKernel::ReSize() { | |||
| conv_param_->input_unit_ = input_unit_; | |||
| conv_param_->output_unit_ = output_unit_; | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // malloc tmp buffer | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| @@ -33,43 +33,52 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive, int out_unit) | |||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(out_unit) {} | |||
| ~ConvolutionWinogradFP16CPUKernel() override { | |||
| ~ConvolutionWinogradFP16CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||
| int InitTmpBuffer(); | |||
| int ConfigInputOutput(); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (fp16_weight_ != nullptr) { | |||
| free(fp16_weight_); | |||
| fp16_weight_ = nullptr; | |||
| } | |||
| if (tmp_data_ != nullptr) { | |||
| free(tmp_data_); | |||
| tmp_data_ = nullptr; | |||
| } | |||
| if (trans_input_ != nullptr) { | |||
| free(trans_input_); | |||
| trans_input_ = nullptr; | |||
| } | |||
| if (gemm_out_ != nullptr) { | |||
| free(gemm_out_); | |||
| gemm_out_ = nullptr; | |||
| } | |||
| if (tmp_out_data_ != nullptr) { | |||
| free(tmp_out_data_); | |||
| tmp_out_data_ = nullptr; | |||
| } | |||
| if (trans_weight_ != nullptr) { | |||
| delete trans_weight_; | |||
| trans_weight_ = nullptr; | |||
| } | |||
| delete trans_weight_; | |||
| } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||
| int InitTmpBuffer(); | |||
| int ConfigInputOutput(); | |||
| private: | |||
| int kernel_unit_; | |||
| int input_unit_; | |||
| int output_unit_; | |||
| float16_t *tmp_data_; | |||
| float16_t *trans_input_; | |||
| float16_t *gemm_out_; | |||
| float16_t *tmp_out_data_; | |||
| Matrix *trans_weight_; | |||
| float16_t *tmp_data_ = nullptr; | |||
| float16_t *trans_input_ = nullptr; | |||
| float16_t *gemm_out_ = nullptr; | |||
| float16_t *tmp_out_data_ = nullptr; | |||
| Matrix *trans_weight_ = nullptr; | |||
| InputTransformUnitFp16Func input_trans_func_; | |||
| OutputTransformUnitFp16Func output_trans_func_; | |||
| TmpBufferAddressFp16 tmp_buffer_address_list_[4]; | |||
| @@ -28,8 +28,14 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { | |||
| delete sliding_; | |||
| DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { FreeTmpBuffer(); } | |||
| void DeconvolutionDepthwiseFp16CPUKernel::FreeTmpBuffer() { | |||
| if (sliding_ != nullptr) { | |||
| delete sliding_; | |||
| sliding_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| @@ -115,6 +121,15 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::Init() { | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| sliding_ = new SlidingWindowParam; | |||
| InitSlideParam(); | |||
| // conv base init | |||
| @@ -137,27 +152,6 @@ int DeconvolutionDepthwiseFp16CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| InitSlideParam(); | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { | |||
| DeconvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, | |||
| sliding_, task_id); | |||
| @@ -52,10 +52,11 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel | |||
| int Execute(int task_id); | |||
| private: | |||
| SlidingWindowParam *sliding_; | |||
| float16_t *packed_weight_; | |||
| float16_t *packed_input_; | |||
| float16_t *packed_output_; | |||
| void FreeTmpBuffer(); | |||
| SlidingWindowParam *sliding_ = nullptr; | |||
| float16_t *packed_weight_ = nullptr; | |||
| float16_t *packed_input_ = nullptr; | |||
| float16_t *packed_output_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -32,7 +32,19 @@ DeConvolutionFp16CPUKernel::~DeConvolutionFp16CPUKernel() { | |||
| int DeConvolutionFp16CPUKernel::ReSize() { | |||
| FreeParam(); | |||
| InitParam(); | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| int error_code = InitParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitParam error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitWeightBias(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitWeightBias error!"; | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -141,24 +153,10 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) { | |||
| } | |||
| int DeConvolutionFp16CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| int error_code = InitParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitParam error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitWeightBias(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitWeightBias error!"; | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int DeConvolutionFp16CPUKernel::Run() { | |||
| @@ -59,19 +59,23 @@ int PoolingFp16CPUKernel::Init() { | |||
| return ret; | |||
| } | |||
| ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init Buffer failed."; | |||
| return ret; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int PoolingFp16CPUKernel::ReSize() { | |||
| auto ret = Init(); | |||
| auto ret = PoolingBaseCPUKernel::ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Pooling resize init failed."; | |||
| return RET_ERROR; | |||
| MS_LOG(ERROR) << "PoolingBase ReSize fai1!ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init Buffer fail!ret: " << ret; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -131,40 +131,17 @@ void ConvolutionCPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init tmp input, output | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // config input output | |||
| ConfigInputOutput(); | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionCPUKernel::ReSize() { | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| } | |||
| FreeTmpBuffer(); | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| @@ -172,12 +149,19 @@ int ConvolutionCPUKernel::ReSize() { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init tmp input, output | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // config input output | |||
| ConfigInputOutput(); | |||
| return RET_OK; | |||
| } | |||
| @@ -30,17 +30,7 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionCPUKernel() override { | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| } | |||
| }; | |||
| ~ConvolutionCPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -51,9 +41,23 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||
| void ConfigInputOutput(); | |||
| private: | |||
| float *packed_input_; | |||
| float *packed_weight_; | |||
| float *tmp_output_block_; | |||
| void FreeTmpBuffer() { | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| tmp_output_block_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| } | |||
| float *packed_input_ = nullptr; | |||
| float *packed_weight_ = nullptr; | |||
| float *tmp_output_block_ = nullptr; | |||
| GEMM_FUNC_FP32 gemm_func_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -23,6 +23,13 @@ using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { | |||
| FreeTmpBuffer(); | |||
| if (matmul_param_ != nullptr) { | |||
| delete matmul_param_; | |||
| } | |||
| } | |||
| void Convolution1x1CPUKernel::FreeTmpBuffer() { | |||
| if (weight_ptr_ != nullptr) { | |||
| free(weight_ptr_); | |||
| weight_ptr_ = nullptr; | |||
| @@ -35,20 +42,23 @@ Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { | |||
| free(input_ptr_); | |||
| input_ptr_ = nullptr; | |||
| } | |||
| delete matmul_param_; | |||
| } | |||
| int Convolution1x1CPUKernel::ReSize() { | |||
| if (pack_input_ != nullptr) { | |||
| free(pack_input_); | |||
| pack_input_ = nullptr; | |||
| FreeTmpBuffer(); | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| InitConv1x1MatmulParam(); | |||
| int error_code = InitConv1x1BiasWeight(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||
| return error_code; | |||
| } | |||
| if (pre_trans_input_ && input_ptr_ != nullptr) { | |||
| free(input_ptr_); | |||
| input_ptr_ = nullptr; | |||
| error_code = InitConv1x1Param(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||
| return error_code; | |||
| } | |||
| InitConv1x1MatmulParam(); | |||
| InitConv1x1Param(); | |||
| return RET_OK; | |||
| } | |||
| @@ -125,24 +135,10 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) { | |||
| } | |||
| int Convolution1x1CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| InitConv1x1MatmulParam(); | |||
| int error_code = InitConv1x1BiasWeight(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||
| return error_code; | |||
| } | |||
| error_code = InitConv1x1Param(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int Convolution1x1CPUKernel::DoConv1x1(int task_id) { | |||
| @@ -52,6 +52,7 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { | |||
| int InitConv1x1BiasWeight(); | |||
| void InitConv1x1MatmulParam(); | |||
| void Pre1x1Trans(float *src_input, float *src_output); | |||
| void FreeTmpBuffer(); | |||
| private: | |||
| MatMulParameter *matmul_param_ = nullptr; | |||
| @@ -159,59 +159,34 @@ void Convolution3x3CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int Convolution3x3CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int Convolution3x3CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed.ret: " << ret; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| MS_LOG(ERROR) << "Init weight bias failed.ret: " << ret; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| MS_LOG(ERROR) << "Init tmp buffer failed.ret: " << ret; | |||
| return RET_ERROR; | |||
| } | |||
| ConfigInputOutput(); | |||
| return RET_OK; | |||
| } | |||
| int Convolution3x3CPUKernel::ReSize() { | |||
| if (tile_buffer_ != nullptr) { | |||
| free(tile_buffer_); | |||
| } | |||
| if (block_unit_buffer_ != nullptr) { | |||
| free(block_unit_buffer_); | |||
| } | |||
| if (tmp_dst_buffer_ != nullptr) { | |||
| free(tmp_dst_buffer_); | |||
| } | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| } | |||
| if (nc4hw4_out_ != nullptr) { | |||
| free(nc4hw4_out_); | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int Convolution3x3CPUKernel::RunImpl(int task_id) { | |||
| if (gemm_func_ == nullptr) { | |||
| MS_LOG(ERROR) << "gemm_func is nullptr."; | |||
| @@ -29,38 +29,45 @@ class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~Convolution3x3CPUKernel() override { | |||
| if (transformed_filter_addr_ != nullptr) { | |||
| free(transformed_filter_addr_); | |||
| } | |||
| ~Convolution3x3CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (tile_buffer_ != nullptr) { | |||
| free(tile_buffer_); | |||
| tile_buffer_ = nullptr; | |||
| } | |||
| if (block_unit_buffer_ != nullptr) { | |||
| free(block_unit_buffer_); | |||
| block_unit_buffer_ = nullptr; | |||
| } | |||
| if (tmp_dst_buffer_ != nullptr) { | |||
| free(tmp_dst_buffer_); | |||
| tmp_dst_buffer_ = nullptr; | |||
| } | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| if (nc4hw4_out_ != nullptr) { | |||
| free(nc4hw4_out_); | |||
| nc4hw4_out_ = nullptr; | |||
| } | |||
| }; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| } | |||
| private: | |||
| float *transformed_filter_addr_; | |||
| float *tile_buffer_; | |||
| float *block_unit_buffer_; | |||
| float *tmp_dst_buffer_; | |||
| float *nc4hw4_out_; | |||
| float *transformed_filter_addr_ = nullptr; | |||
| float *tile_buffer_ = nullptr; | |||
| float *block_unit_buffer_ = nullptr; | |||
| float *tmp_dst_buffer_ = nullptr; | |||
| float *nc4hw4_out_ = nullptr; | |||
| TmpBufferAddress tmp_buffer_address_list_[4]; | |||
| GEMM_FUNC_FP32 gemm_func_ = nullptr; | |||
| }; | |||
| @@ -29,8 +29,14 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() { | |||
| delete sliding_; | |||
| ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() { FreeTmpBuffer(); } | |||
| void ConvolutionDepthwiseCPUKernel::FreeTmpBuffer() { | |||
| if (sliding_ != nullptr) { | |||
| delete sliding_; | |||
| sliding_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| @@ -105,10 +111,14 @@ int ConvolutionDepthwiseCPUKernel::InitBuffer() { | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| @@ -130,33 +140,6 @@ int ConvolutionDepthwiseCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::ReSize() { | |||
| if (need_align_) { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| // init sliding window param | |||
| sliding_ = new SlidingWindowParam; | |||
| InitSlidingParamConvDw(sliding_, conv_param_, C4NUM); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionDepthwiseCPUKernel::Execute(int task_id) { | |||
| ConvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, | |||
| sliding_, task_id); | |||
| @@ -40,6 +40,7 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||
| int Execute(int task_id); | |||
| private: | |||
| void FreeTmpBuffer(); | |||
| SlidingWindowParam *sliding_ = nullptr; | |||
| float *packed_weight_ = nullptr; | |||
| float *packed_input_ = nullptr; | |||
| @@ -100,50 +100,56 @@ int ConvolutionDepthwise3x3CPUKernel::InitBuffer() { | |||
| } | |||
| int ConvolutionDepthwise3x3CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| return RET_OK; | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initWeightBias error!"; | |||
| return ret; | |||
| } | |||
| // init threadNum; | |||
| conv_param_->thread_num_ = MSMIN(thread_count_, UP_DIV(conv_param_->output_channel_, C4NUM)); | |||
| ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initBuffer error!"; | |||
| return ret; | |||
| } | |||
| // malloc one block buffer | |||
| block_buffer_ = reinterpret_cast<float *>(malloc(thread_count_ * 16 * C4NUM * sizeof(float))); | |||
| if (block_buffer_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc block buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionDepthwise3x3CPUKernel::ReSize() { | |||
| void ConvolutionDepthwise3x3CPUKernel::FreeTmpBufer() { | |||
| if (need_align_) { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| free(packed_output_); | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| free(trans_buffer_); | |||
| if (trans_buffer_ != nullptr) { | |||
| free(trans_buffer_); | |||
| trans_buffer_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| } | |||
| int ConvolutionDepthwise3x3CPUKernel::ReSize() { | |||
| FreeTmpBufer(); | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = InitBuffer(); | |||
| auto ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initWeightBias error!ret: " << ret; | |||
| return ret; | |||
| } | |||
| // init threadNum; | |||
| conv_param_->thread_num_ = MSMIN(thread_count_, UP_DIV(conv_param_->output_channel_, C4NUM)); | |||
| ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initBuffer error!"; | |||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initBuffer error!ret: " << ret; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| @@ -31,13 +31,11 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionDepthwise3x3CPUKernel() override { | |||
| free(packed_weight_); | |||
| if (need_align_) { | |||
| free(packed_input_); | |||
| free(packed_output_); | |||
| FreeTmpBufer(); | |||
| if (block_buffer_ != nullptr) { | |||
| free(block_buffer_); | |||
| block_buffer_ = nullptr; | |||
| } | |||
| free(block_buffer_); | |||
| free(trans_buffer_); | |||
| }; | |||
| int Init() override; | |||
| @@ -49,6 +47,7 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||
| int Execute(int task_id); | |||
| private: | |||
| void FreeTmpBufer(); | |||
| float *packed_weight_ = nullptr; | |||
| float *packed_input_ = nullptr; | |||
| float *packed_output_ = nullptr; | |||
| @@ -107,10 +107,20 @@ void ConvolutionSWCPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionSWCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionSWCPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| @@ -136,32 +146,6 @@ int ConvolutionSWCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionSWCPUKernel::ReSize() { | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| } | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| } | |||
| delete slidingWindow_param_; | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init tmp input, output | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init sliding window param | |||
| slidingWindow_param_ = new SlidingWindowParam; | |||
| InitSlidingParamConv(slidingWindow_param_, conv_param_, C4NUM); | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionSWCPUKernel::RunImpl(int task_id) { | |||
| auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->Data()); | |||
| ConvSWFp32(reinterpret_cast<float *>(nhwc4_input_), packed_weight_, reinterpret_cast<float *>(bias_data_), | |||
| @@ -32,15 +32,7 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionSWCPUKernel() override { | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| } | |||
| delete slidingWindow_param_; | |||
| }; | |||
| ~ConvolutionSWCPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -51,9 +43,23 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { | |||
| void ConfigInputOutput(); | |||
| private: | |||
| float *packed_weight_; | |||
| float *tmp_output_block_; | |||
| SlidingWindowParam *slidingWindow_param_; | |||
| void FreeTmpBuffer() { | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (tmp_output_block_ != nullptr) { | |||
| free(tmp_output_block_); | |||
| tmp_output_block_ = nullptr; | |||
| } | |||
| if (slidingWindow_param_ != nullptr) { | |||
| delete slidingWindow_param_; | |||
| slidingWindow_param_ = nullptr; | |||
| } | |||
| } | |||
| float *packed_weight_ = nullptr; | |||
| float *tmp_output_block_ = nullptr; | |||
| SlidingWindowParam *slidingWindow_param_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_SLIDEWINDOW_H_ | |||
| @@ -245,54 +245,17 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionWinogradCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| kernel_unit_ = conv_param_->kernel_h_; | |||
| input_unit_ = output_unit_ + kernel_unit_ - 1; | |||
| conv_param_->input_unit_ = input_unit_; | |||
| conv_param_->output_unit_ = output_unit_; | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // malloc tmp buffer | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = ConfigInputOutput(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConfigInputOutput failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionWinogradCPUKernel::ReSize() { | |||
| if (tmp_data_ != nullptr) { | |||
| free(tmp_data_); | |||
| } | |||
| if (trans_input_ != nullptr) { | |||
| free(trans_input_); | |||
| } | |||
| if (gemm_out_ != nullptr) { | |||
| free(gemm_out_); | |||
| } | |||
| if (tmp_out_data_ != nullptr) { | |||
| free(tmp_out_data_); | |||
| } | |||
| FreeTmpBuffer(); | |||
| if (nhwc4_input_ != nullptr) { | |||
| free(nhwc4_input_); | |||
| nhwc4_input_ = nullptr; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| @@ -305,6 +268,12 @@ int ConvolutionWinogradCPUKernel::ReSize() { | |||
| conv_param_->input_unit_ = input_unit_; | |||
| conv_param_->output_unit_ = output_unit_; | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // malloc tmp buffer | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| @@ -30,40 +30,51 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | |||
| ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | |||
| const lite::Primitive *primitive, int output_unit) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit) {} | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit), | |||
| trans_weight_(nullptr) {} | |||
| ~ConvolutionWinogradCPUKernel() override { | |||
| FreeTmpBuffer(); | |||
| }; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||
| int InitTmpBuffer(); | |||
| int ConfigInputOutput(); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (tmp_data_ != nullptr) { | |||
| free(tmp_data_); | |||
| tmp_data_ = nullptr; | |||
| } | |||
| if (trans_input_ != nullptr) { | |||
| free(trans_input_); | |||
| trans_input_ = nullptr; | |||
| } | |||
| if (gemm_out_ != nullptr) { | |||
| free(gemm_out_); | |||
| gemm_out_ = nullptr; | |||
| } | |||
| if (tmp_out_data_ != nullptr) { | |||
| free(tmp_out_data_); | |||
| tmp_out_data_ = nullptr; | |||
| } | |||
| delete trans_weight_; | |||
| }; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| int InitWeightBias(); | |||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||
| int InitTmpBuffer(); | |||
| int ConfigInputOutput(); | |||
| private: | |||
| if (trans_weight_ != nullptr) { | |||
| delete trans_weight_; | |||
| trans_weight_ = nullptr; | |||
| } | |||
| } | |||
| int kernel_unit_; | |||
| int input_unit_; | |||
| int output_unit_; | |||
| float *tmp_data_; | |||
| float *trans_input_; | |||
| float *gemm_out_; | |||
| float *tmp_out_data_; | |||
| Matrix *trans_weight_; | |||
| float *tmp_data_ = nullptr; | |||
| float *trans_input_ = nullptr; | |||
| float *gemm_out_ = nullptr; | |||
| float *tmp_out_data_ = nullptr; | |||
| Matrix *trans_weight_ = nullptr; | |||
| InputTransformUnitFunc input_trans_func_; | |||
| OutputTransformUnitFunc output_trans_func_; | |||
| TmpBufferAddress tmp_buffer_address_list_[5]; | |||
| @@ -25,7 +25,9 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeConv2D; | |||
| namespace mindspore::kernel { | |||
| DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { | |||
| DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { FreeTmpBuffer(); } | |||
| void DeConvolutionCPUKernel::FreeTmpBuffer() { | |||
| if (weight_ptr_ != nullptr) { | |||
| free(weight_ptr_); | |||
| weight_ptr_ = nullptr; | |||
| @@ -42,24 +44,23 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { | |||
| free(pack_output_); | |||
| pack_output_ = nullptr; | |||
| } | |||
| return; | |||
| } | |||
| int DeConvolutionCPUKernel::ReSize() { | |||
| if (tmp_buffer_ != nullptr) { | |||
| free(tmp_buffer_); | |||
| tmp_buffer_ = nullptr; | |||
| } | |||
| if (pack_input_ != nullptr) { | |||
| free(pack_input_); | |||
| pack_input_ = nullptr; | |||
| } | |||
| if (pack_output_ != nullptr) { | |||
| free(pack_output_); | |||
| pack_output_ = nullptr; | |||
| FreeTmpBuffer(); | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| int error_code = InitParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitParam error!ret: " << error_code; | |||
| return error_code; | |||
| } | |||
| InitParam(); | |||
| error_code = InitWeightBias(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code; | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -151,24 +152,10 @@ int DeConvolutionCPUKernel::DoDeconv(int task_id) { | |||
| } | |||
| int DeConvolutionCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| int error_code = InitParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitParam error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitWeightBias(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv InitWeightBias error!"; | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int DeConvolutionCPUKernel::Run() { | |||
| @@ -47,20 +47,21 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||
| private: | |||
| int InitParam(); | |||
| int InitWeightBias(); | |||
| void FreeTmpBuffer(); | |||
| private: | |||
| MatMulParameter *matmul_param_; | |||
| MatMulParameter *matmul_param_ = nullptr; | |||
| int input_plane_; | |||
| int kernel_plane_; | |||
| int output_plane_; | |||
| int thread_count_; | |||
| int thread_stride_; | |||
| float *weight_ptr_; | |||
| float *pack_input_; | |||
| float *pack_output_; | |||
| float *tmp_buffer_; | |||
| float *input_ptr_; | |||
| float *output_ptr_; | |||
| float *weight_ptr_ = nullptr; | |||
| float *pack_input_ = nullptr; | |||
| float *pack_output_ = nullptr; | |||
| float *tmp_buffer_ = nullptr; | |||
| float *input_ptr_ = nullptr; | |||
| float *output_ptr_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_ | |||
| @@ -27,8 +27,14 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { | |||
| delete sliding_; | |||
| DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { FreeTmpBuffer(); } | |||
| void DeconvolutionDepthwiseCPUKernel::FreeTmpBuffer() { | |||
| if (sliding_ != nullptr) { | |||
| delete sliding_; | |||
| sliding_ = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| @@ -120,48 +126,28 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() { | |||
| } | |||
| int DeconvolutionDepthwiseCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int DeconvolutionDepthwiseCPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| InitSlideParam(); | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = InitWeightBias(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitWeightBias failed."; | |||
| return RET_ERROR; | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitWeightBias failed.ret: " << ret; | |||
| return ret; | |||
| } | |||
| ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DeconvolutionDepthwiseCPUKernel::ReSize() { | |||
| if (need_align_) { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| InitSlideParam(); | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != 0) { | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed."; | |||
| return RET_ERROR; | |||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -41,10 +41,11 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||
| int Execute(int task_id); | |||
| private: | |||
| SlidingWindowParam *sliding_; | |||
| float *packed_weight_; | |||
| float *packed_input_; | |||
| float *packed_output_; | |||
| void FreeTmpBuffer(); | |||
| SlidingWindowParam *sliding_ = nullptr; | |||
| float *packed_weight_ = nullptr; | |||
| float *packed_input_ = nullptr; | |||
| float *packed_output_ = nullptr; | |||
| bool need_align_ = false; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -28,13 +28,10 @@ using mindspore::schema::PrimitiveType_Flatten; | |||
| namespace mindspore::kernel { | |||
| int FlattenCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ReSize(); | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int FlattenCPUKernel::ReSize() { | |||
| @@ -99,10 +99,14 @@ int LstmCPUKernel::InitWeightBias() { | |||
| } | |||
| int LstmCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int LstmCPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| auto ret = InitParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "LstmCPUKernel InitParam error."; | |||
| @@ -123,23 +127,6 @@ int LstmCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int LstmCPUKernel::ReSize() { | |||
| free(gate_buffer_); | |||
| auto ret = InitParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "LstmCPUKernel InitParam error."; | |||
| return RET_ERROR; | |||
| } | |||
| ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "LstmCPUKernel InitBuffer error."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int LstmCPUKernel::Run() { | |||
| auto prepare_ret = Prepare(); | |||
| if (prepare_ret != RET_OK) { | |||
| @@ -170,13 +157,16 @@ int LstmCPUKernel::Run() { | |||
| } | |||
| kernel::LiteKernel *CpuLstmKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter, | |||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | |||
| const lite::Context *ctx, const kernel::KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(opParameter != nullptr); | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "Input parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == schema::PrimitiveType_Lstm); | |||
| auto *kernel = new (std::nothrow) LstmCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||
| auto *kernel = new (std::nothrow) LstmCPUKernel(parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| MS_LOG(ERROR) << "kernel is nullptr."; | |||
| return nullptr; | |||
| @@ -184,8 +174,8 @@ kernel::LiteKernel *CpuLstmKernelCreator(const std::vector<lite::tensor::Tensor | |||
| auto ret = kernel->Init(); | |||
| if (ret != RET_OK) { | |||
| delete kernel; | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||
| MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_ << ", type: " | |||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_)); | |||
| return nullptr; | |||
| } | |||
| return kernel; | |||
| @@ -31,12 +31,7 @@ class LstmCPUKernel : public LiteKernel { | |||
| lstm_parm_ = reinterpret_cast<LstmParameter *>(op_parameter_); | |||
| } | |||
| ~LstmCPUKernel() override { | |||
| free(gate_buffer_); | |||
| free(weight_i_ptr_); | |||
| free(weight_h_ptr_); | |||
| free(bias_ptr_); | |||
| } | |||
| ~LstmCPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -47,11 +42,29 @@ class LstmCPUKernel : public LiteKernel { | |||
| int InitWeightBias(); | |||
| private: | |||
| float *gate_buffer_; | |||
| float *weight_i_ptr_; | |||
| float *weight_h_ptr_; | |||
| float *bias_ptr_; | |||
| LstmParameter *lstm_parm_; | |||
| void FreeTmpBuffer() { | |||
| if (gate_buffer_ != nullptr) { | |||
| free(gate_buffer_); | |||
| gate_buffer_ = nullptr; | |||
| } | |||
| if (weight_i_ptr_ != nullptr) { | |||
| free(weight_i_ptr_); | |||
| weight_i_ptr_ = nullptr; | |||
| } | |||
| if (weight_h_ptr_ != nullptr) { | |||
| free(weight_h_ptr_); | |||
| weight_h_ptr_ = nullptr; | |||
| } | |||
| if (bias_ptr_ != nullptr) { | |||
| free(bias_ptr_); | |||
| bias_ptr_ = nullptr; | |||
| } | |||
| } | |||
| float *gate_buffer_ = nullptr; | |||
| float *weight_i_ptr_ = nullptr; | |||
| float *weight_h_ptr_ = nullptr; | |||
| float *bias_ptr_ = nullptr; | |||
| LstmParameter *lstm_parm_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -29,23 +29,22 @@ using mindspore::schema::PrimitiveType_Pooling; | |||
| namespace mindspore::kernel { | |||
| int PoolingCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| return RET_OK; | |||
| } | |||
| auto ret = PoolingBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "PoolingBase Init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int PoolingCPUKernel::ReSize() { | |||
| auto ret = Init(); | |||
| auto ret = PoolingBaseCPUKernel::ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Pooling resize init failed."; | |||
| return RET_ERROR; | |||
| MS_LOG(ERROR) << "PoolingBase ReSize fai1!ret: " << ret; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -25,10 +25,15 @@ using mindspore::schema::PrimitiveType_TopK; | |||
| namespace mindspore::kernel { | |||
| int TopKCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||
| parameter->topk_node_list_ = nullptr; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int TopKCPUKernel::ReSize() { | |||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||
| lite::tensor::Tensor *input = in_tensors_.at(0); | |||
| parameter->last_dim_size_ = input->shape()[input->shape().size() - 1]; | |||
| @@ -37,6 +42,10 @@ int TopKCPUKernel::Init() { | |||
| parameter->loop_num_ *= input->shape()[i]; | |||
| } | |||
| if (parameter->topk_node_list_ != nullptr) { | |||
| free(parameter->topk_node_list_); | |||
| parameter->topk_node_list_ = nullptr; | |||
| } | |||
| parameter->topk_node_list_ = malloc(sizeof(TopkNode) * parameter->last_dim_size_); | |||
| if (parameter->topk_node_list_ == nullptr) { | |||
| MS_LOG(ERROR) << "malloc fail."; | |||
| @@ -45,8 +54,6 @@ int TopKCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int TopKCPUKernel::ReSize() { return RET_OK; } | |||
| int TopKCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| @@ -65,7 +72,10 @@ kernel::LiteKernel *CpuTopKFp32KernelCreator(const std::vector<lite::tensor::Ten | |||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | |||
| const lite::Context *ctx, const KernelKey &desc, | |||
| const lite::Primitive *primitive) { | |||
| MS_ASSERT(parameter != nullptr); | |||
| if (parameter == nullptr) { | |||
| MS_LOG(ERROR) << "input parameter is nullptr!"; | |||
| return nullptr; | |||
| } | |||
| MS_ASSERT(desc.type == PrimitiveType_Tile); | |||
| auto *kernel = new (std::nothrow) TopKCPUKernel(parameter, inputs, outputs, ctx, primitive); | |||
| if (kernel == nullptr) { | |||
| @@ -24,10 +24,13 @@ using mindspore::schema::PrimitiveType_Unstack; | |||
| namespace mindspore::kernel { | |||
| int UnstackCPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int UnstackCPUKernel::ReSize() { | |||
| auto input = in_tensors_.at(0); | |||
| MS_ASSERT(input != nullptr); | |||
| size_t shape_size = input->shape().size(); | |||
| @@ -48,7 +51,10 @@ int UnstackCPUKernel::Init() { | |||
| para->axis_dim_ = input->DimensionSize(i); | |||
| } | |||
| } | |||
| if (output_addr_array_ != nullptr) { | |||
| free(output_addr_array_); | |||
| output_addr_array_ = nullptr; | |||
| } | |||
| output_addr_array_ = reinterpret_cast<float **>(malloc(sizeof(float *) * out_tensors_.size())); | |||
| if (output_addr_array_ == nullptr) { | |||
| MS_LOG(ERROR) << "Failed to malloc memory"; | |||
| @@ -57,8 +63,6 @@ int UnstackCPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int UnstackCPUKernel::ReSize() { return RET_OK; } | |||
| int UnstackCPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| @@ -34,7 +34,7 @@ class UnstackCPUKernel : public LiteKernel { | |||
| int Run() override; | |||
| private: | |||
| float **output_addr_array_; | |||
| float **output_addr_array_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -28,11 +28,6 @@ using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| int ArithmeticSelfInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| return RET_OK; | |||
| } | |||
| int ret = ReSize(); | |||
| auto *input_tensor = in_tensors_.at(kInputIndex); | |||
| auto in_quant_args = input_tensor->GetQuantParams(); | |||
| para_->quant_arg_.in_args_.scale_ = in_quant_args.front().scale; | |||
| @@ -57,7 +52,10 @@ int ArithmeticSelfInt8CPUKernel::Init() { | |||
| para_->quant_arg_.shift_right_ = right_shift > 0 ? right_shift : 0; | |||
| } | |||
| return ret; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ArithmeticSelfInt8CPUKernel::ReSize() { | |||
| @@ -26,10 +26,13 @@ using mindspore::schema::PrimitiveType_BiasAdd; | |||
| namespace mindspore::kernel { | |||
| int BiasAddInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int BiasAddInt8CPUKernel::ReSize() { | |||
| auto bias_param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | |||
| auto dims = in_tensors_[0]->shape(); | |||
| bias_param->ndim_ = dims.size(); | |||
| @@ -39,11 +42,9 @@ int BiasAddInt8CPUKernel::Init() { | |||
| bias_param->out_shape_[i] = dims[i]; | |||
| } | |||
| bias_param->in_shape1_[3] = dims[3]; | |||
| return NNACL_OK; | |||
| return RET_OK; | |||
| } | |||
| int BiasAddInt8CPUKernel::ReSize() { return NNACL_OK; } | |||
| int BiasAddInt8CPUKernel::Run() { | |||
| auto ret = Prepare(); | |||
| if (ret != RET_OK) { | |||
| @@ -43,28 +43,36 @@ void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParamete | |||
| free(tmp_addr); | |||
| } | |||
| Convolution3x3Int8CPUKernel::~Convolution3x3Int8CPUKernel() { | |||
| void Convolution3x3Int8CPUKernel::FreeTmpBuffer() { | |||
| if (transformed_filter_addr_ != nullptr) { | |||
| free(transformed_filter_addr_); | |||
| transformed_filter_addr_ = nullptr; | |||
| } | |||
| if (input_data_ != nullptr) { | |||
| free(input_data_); | |||
| input_data_ = nullptr; | |||
| } | |||
| if (tile_buffer_ != nullptr) { | |||
| free(tile_buffer_); | |||
| tile_buffer_ = nullptr; | |||
| } | |||
| if (block_unit_buffer_ != nullptr) { | |||
| free(block_unit_buffer_); | |||
| block_unit_buffer_ = nullptr; | |||
| } | |||
| if (tmp_dst_buffer_ != nullptr) { | |||
| free(tmp_dst_buffer_); | |||
| tmp_dst_buffer_ = nullptr; | |||
| } | |||
| if (tmp_out_ != nullptr) { | |||
| free(tmp_out_); | |||
| tmp_out_ = nullptr; | |||
| } | |||
| FreeQuantParam(); | |||
| } | |||
| Convolution3x3Int8CPUKernel::~Convolution3x3Int8CPUKernel() { FreeTmpBuffer(); } | |||
| int Convolution3x3Int8CPUKernel::InitWeightBias() { | |||
| auto input_channel = conv_param_->input_channel_; | |||
| auto output_channel = conv_param_->output_channel_; | |||
| @@ -161,10 +169,15 @@ void Convolution3x3Int8CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int Convolution3x3Int8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int Convolution3x3Int8CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| @@ -191,37 +204,6 @@ int Convolution3x3Int8CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int Convolution3x3Int8CPUKernel::ReSize() { | |||
| if (input_data_ != nullptr) { | |||
| free(input_data_); | |||
| } | |||
| if (tile_buffer_ != nullptr) { | |||
| free(tile_buffer_); | |||
| } | |||
| if (block_unit_buffer_ != nullptr) { | |||
| free(block_unit_buffer_); | |||
| } | |||
| if (tmp_dst_buffer_ != nullptr) { | |||
| free(tmp_dst_buffer_); | |||
| } | |||
| if (tmp_out_ != nullptr) { | |||
| free(tmp_out_); | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init tmp input, output | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int Convolution3x3Int8CPUKernel::RunImpl(int task_id) { | |||
| auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->Data()); | |||
| Conv3x3Int8(input_data_, transformed_filter_addr_, reinterpret_cast<int32_t *>(bias_data_), output_addr, tile_buffer_, | |||
| @@ -41,12 +41,13 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| void ConfigInputOutput(); | |||
| private: | |||
| int16_t *transformed_filter_addr_; | |||
| int16_t *input_data_; | |||
| int16_t *tile_buffer_; | |||
| int16_t *block_unit_buffer_; | |||
| int32_t *tmp_dst_buffer_; | |||
| int8_t *tmp_out_; | |||
| void FreeTmpBuffer(); | |||
| int16_t *transformed_filter_addr_ = nullptr; | |||
| int16_t *input_data_ = nullptr; | |||
| int16_t *tile_buffer_ = nullptr; | |||
| int16_t *block_unit_buffer_ = nullptr; | |||
| int32_t *tmp_dst_buffer_ = nullptr; | |||
| int8_t *tmp_out_ = nullptr; | |||
| }; | |||
| void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); | |||
| } // namespace mindspore::kernel | |||
| @@ -28,8 +28,12 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | |||
| delete sliding; | |||
| void ConvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { | |||
| if (sliding != nullptr) { | |||
| delete sliding; | |||
| sliding = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| @@ -46,6 +50,8 @@ ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | |||
| } | |||
| } | |||
| ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { FreeTmpBuffer(); } | |||
| int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | |||
| // init weight, int8 -> int16 | |||
| // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 | |||
| @@ -99,10 +105,15 @@ int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||
| } | |||
| int ConvolutionDepthwiseInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| @@ -132,35 +143,6 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| // init sliding window param | |||
| InitSlidingParamConvDw(sliding, conv_param_, C4NUM); | |||
| // init quant param | |||
| ConvolutionBaseCPUKernel::SetQuantParam(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int ConvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { | |||
| ConvDwInt8(packed_output_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), conv_param_, | |||
| sliding, task_id); | |||
| @@ -40,10 +40,11 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| int Execute(int task_id); | |||
| private: | |||
| SlidingWindowParam *sliding; | |||
| int16_t *packed_weight_; | |||
| int16_t *packed_input_; | |||
| int8_t *packed_output_; | |||
| void FreeTmpBuffer(); | |||
| SlidingWindowParam *sliding = nullptr; | |||
| int16_t *packed_weight_ = nullptr; | |||
| int16_t *packed_input_ = nullptr; | |||
| int8_t *packed_output_ = nullptr; | |||
| bool need_align_ = false; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -293,46 +293,10 @@ void ConvolutionInt8CPUKernel::ConfigInputOutput() { | |||
| } | |||
| int ConvolutionInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // config input output | |||
| ConfigInputOutput(); | |||
| CheckSupportOptimize(); | |||
| ret = SetQuantParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set quant param failed."; | |||
| return ret; | |||
| } | |||
| // init for opt | |||
| if (support_optimize_) { | |||
| ret = InitOpt(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Initialization for optimized int8 conv failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| // init for situation that not support sdot | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init tmp input, output | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int ConvolutionInt8CPUKernel::InitOpt() { | |||
| @@ -351,32 +315,37 @@ int ConvolutionInt8CPUKernel::InitOpt() { | |||
| } | |||
| int ConvolutionInt8CPUKernel::ReSize() { | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| } | |||
| if (input_sum_ != nullptr) { | |||
| free(input_sum_); | |||
| } | |||
| if (tmp_dst_ != nullptr) { | |||
| free(tmp_dst_); | |||
| } | |||
| if (tmp_out_ != nullptr) { | |||
| free(tmp_out_); | |||
| } | |||
| FreeTmpBuffer(); | |||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // config input output | |||
| ConfigInputOutput(); | |||
| CheckSupportOptimize(); | |||
| ret = SetQuantParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set quant param failed."; | |||
| return ret; | |||
| } | |||
| // init for opt | |||
| if (support_optimize_) { | |||
| ret = InitTmpBufferOpt(); | |||
| ret = InitOpt(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init tmp buffer for opt failed."; | |||
| MS_LOG(ERROR) << "Initialization for optimized int8 conv failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| // init for situation that not support sdot | |||
| ret = InitWeightBias(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||
| return RET_ERROR; | |||
| } | |||
| // init tmp input, output | |||
| ret = InitTmpBuffer(); | |||
| if (ret != RET_OK) { | |||
| @@ -30,38 +30,44 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~ConvolutionInt8CPUKernel() override { | |||
| ~ConvolutionInt8CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| void CheckSupportOptimize(); | |||
| int InitOpt(); | |||
| int InitWeightBiasOpt(); | |||
| int InitTmpBufferOpt(); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (packed_weight_ != nullptr) { | |||
| free(packed_weight_); | |||
| packed_weight_ = nullptr; | |||
| } | |||
| if (packed_input_ != nullptr) { | |||
| free(packed_input_); | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (input_sum_ != nullptr) { | |||
| free(input_sum_); | |||
| input_sum_ = nullptr; | |||
| } | |||
| if (tmp_dst_ != nullptr) { | |||
| free(tmp_dst_); | |||
| tmp_dst_ = nullptr; | |||
| } | |||
| if (tmp_out_ != nullptr) { | |||
| free(tmp_out_); | |||
| tmp_out_ = nullptr; | |||
| } | |||
| FreeQuantParam(); | |||
| }; | |||
| int Init() override; | |||
| int ReSize() override; | |||
| int Run() override; | |||
| int RunImpl(int task_id); | |||
| void CheckSupportOptimize(); | |||
| int InitOpt(); | |||
| int InitWeightBiasOpt(); | |||
| int InitTmpBufferOpt(); | |||
| int InitWeightBias(); | |||
| int InitTmpBuffer(); | |||
| void ConfigInputOutput(); | |||
| private: | |||
| } | |||
| bool support_optimize_ = true; | |||
| int8_t *packed_weight_ = nullptr; | |||
| int8_t *packed_input_ = nullptr; | |||
| @@ -28,8 +28,13 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | |||
| namespace mindspore::kernel { | |||
| DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { | |||
| delete sliding; | |||
| DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { FreeTmpBuffer(); } | |||
| void DeconvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { | |||
| if (sliding != nullptr) { | |||
| delete sliding; | |||
| sliding = nullptr; | |||
| } | |||
| if (packed_weight_ != nullptr) { | |||
| delete packed_weight_; | |||
| packed_weight_ = nullptr; | |||
| @@ -137,10 +142,15 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||
| } | |||
| int DeconvolutionDepthwiseInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| sliding = new SlidingWindowParam; | |||
| InitSlideParam(); | |||
| @@ -169,35 +179,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | |||
| if (packed_input_ != nullptr) { | |||
| delete packed_input_; | |||
| packed_input_ = nullptr; | |||
| } | |||
| if (need_align_) { | |||
| if (packed_output_ != nullptr) { | |||
| delete packed_output_; | |||
| packed_output_ = nullptr; | |||
| } | |||
| } | |||
| if (output_buffer_ != nullptr) { | |||
| delete output_buffer_; | |||
| output_buffer_ = nullptr; | |||
| } | |||
| InitSlideParam(); | |||
| // conv base init | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| auto ret = InitBuffer(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!"; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DeconvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { | |||
| DeconvDwInt8(packed_output_, output_buffer_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), | |||
| conv_param_, sliding, task_id); | |||
| @@ -41,11 +41,12 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| int Execute(int task_id); | |||
| private: | |||
| SlidingWindowParam *sliding; | |||
| int16_t *packed_weight_; | |||
| int16_t *packed_input_; | |||
| int8_t *packed_output_; | |||
| int32_t *output_buffer_; | |||
| void FreeTmpBuffer(); | |||
| SlidingWindowParam *sliding = nullptr; | |||
| int16_t *packed_weight_ = nullptr; | |||
| int16_t *packed_input_ = nullptr; | |||
| int8_t *packed_output_ = nullptr; | |||
| int32_t *output_buffer_ = nullptr; | |||
| bool need_align_ = false; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -27,7 +27,9 @@ using mindspore::lite::RET_OK; | |||
| using mindspore::schema::PrimitiveType_DeConv2D; | |||
| namespace mindspore::kernel { | |||
| DeConvInt8CPUKernel::~DeConvInt8CPUKernel() { | |||
| DeConvInt8CPUKernel::~DeConvInt8CPUKernel() { FreeTmpBuffer(); } | |||
| void DeConvInt8CPUKernel::FreeTmpBuffer() { | |||
| if (weight_ptr_ != nullptr) { | |||
| free(weight_ptr_); | |||
| weight_ptr_ = nullptr; | |||
| @@ -47,7 +49,35 @@ DeConvInt8CPUKernel::~DeConvInt8CPUKernel() { | |||
| ConvolutionBaseCPUKernel::FreeQuantParam(); | |||
| } | |||
| int DeConvInt8CPUKernel::ReSize() { return RET_OK; } | |||
| int DeConvInt8CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| int error_code = ConvolutionBaseCPUKernel::SetQuantParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 SetQuantParam error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 InitParam error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitBiasWeight(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 InitBiasWeight error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitData(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 InitData error!"; | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int DeConvInt8CPUKernel::InitParam() { | |||
| fc_param_ = new MatMulParameter(); | |||
| @@ -115,35 +145,10 @@ int DeConvInt8CPUKernel::InitData() { | |||
| } | |||
| int DeConvInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| ConvolutionBaseCPUKernel::Init(); | |||
| int error_code = ConvolutionBaseCPUKernel::SetQuantParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 SetQuantParam error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitParam(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 InitParam error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitBiasWeight(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 InitBiasWeight error!"; | |||
| return error_code; | |||
| } | |||
| error_code = InitData(); | |||
| if (error_code != RET_OK) { | |||
| MS_LOG(ERROR) << "deconv int8 InitData error!"; | |||
| return error_code; | |||
| } | |||
| return RET_OK; | |||
| return ReSize(); | |||
| } | |||
| int DeConvInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | |||
| @@ -51,12 +51,13 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||
| int InitBiasWeight(); | |||
| private: | |||
| MatMulParameter *fc_param_; | |||
| int8_t *weight_ptr_; | |||
| int8_t *input_ptr_; /* record c8 input*/ | |||
| int32_t *tmp_buffer_; /* record matmul result */ | |||
| int32_t *tmp_output_; /* record post c8 result */ | |||
| int8_t *output_ptr_; | |||
| void FreeTmpBuffer(); | |||
| MatMulParameter *fc_param_ = nullptr; | |||
| int8_t *weight_ptr_ = nullptr; | |||
| int8_t *input_ptr_ = nullptr; /* record c8 input*/ | |||
| int32_t *tmp_buffer_ = nullptr; /* record matmul result */ | |||
| int32_t *tmp_output_ = nullptr; /* record post c8 result */ | |||
| int8_t *output_ptr_ = nullptr; | |||
| size_t thread_count_; | |||
| size_t thread_stride_; | |||
| }; | |||
| @@ -25,10 +25,14 @@ using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| int FullconnectionInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int FullconnectionInt8CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| fc_param_->row_ = (in_tensors_[0]->shape())[0]; | |||
| fc_param_->col_ = (in_tensors_[1]->shape())[0]; | |||
| fc_param_->deep_ = (in_tensors_[1]->shape())[1]; | |||
| @@ -92,8 +96,6 @@ int FullconnectionInt8CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int FullconnectionInt8CPUKernel::ReSize() { return RET_OK; } | |||
| int FullconnectionInt8CPUKernel::RunImpl(int task_id) { | |||
| int cur_oc = MSMIN(thread_stride_, UP_DIV(fc_param_->col_8_, 8) - task_id * thread_stride_); | |||
| if (cur_oc <= 0) { | |||
| @@ -31,11 +31,7 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel { | |||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | |||
| const lite::Primitive *primitive) | |||
| : FullconnectionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | |||
| ~FullconnectionInt8CPUKernel() override { | |||
| ctx_->allocator->Free(a_c8_ptr_); | |||
| ctx_->allocator->Free(b_r8_ptr_); | |||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||
| } | |||
| ~FullconnectionInt8CPUKernel() override { FreeTmpBuffer(); } | |||
| int Init() override; | |||
| int ReSize() override; | |||
| @@ -43,11 +39,29 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel { | |||
| int RunImpl(int task_id); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (a_c8_ptr_ != nullptr) { | |||
| ctx_->allocator->Free(a_c8_ptr_); | |||
| a_c8_ptr_ = nullptr; | |||
| } | |||
| if (b_r8_ptr_ != nullptr) { | |||
| ctx_->allocator->Free(b_r8_ptr_); | |||
| b_r8_ptr_ = nullptr; | |||
| } | |||
| if (c_r8x8_ptr_ != nullptr) { | |||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||
| c_r8x8_ptr_ = nullptr; | |||
| } | |||
| if (bias_ptr_ != nullptr) { | |||
| ctx_->allocator->Free(bias_ptr_); | |||
| bias_ptr_ = nullptr; | |||
| } | |||
| } | |||
| MatmulQuantArg quant_params_; | |||
| int8_t *a_c8_ptr_; | |||
| int8_t *b_r8_ptr_; | |||
| int *c_r8x8_ptr_; | |||
| int *bias_ptr_; | |||
| int8_t *a_c8_ptr_ = nullptr; | |||
| int8_t *b_r8_ptr_ = nullptr; | |||
| int *c_r8x8_ptr_ = nullptr; | |||
| int *bias_ptr_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -24,17 +24,17 @@ using mindspore::lite::RET_MEMORY_FAILED; | |||
| using mindspore::lite::RET_OK; | |||
| namespace mindspore::kernel { | |||
| MatmulInt8CPUKernel::~MatmulInt8CPUKernel() { | |||
| ctx_->allocator->Free(a_c8_ptr_); | |||
| ctx_->allocator->Free(b_r8_ptr_); | |||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||
| } | |||
| MatmulInt8CPUKernel::~MatmulInt8CPUKernel() { FreeTmpBuffer(); } | |||
| int MatmulInt8CPUKernel::Init() { | |||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||
| set_need_reinit(); | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int MatmulInt8CPUKernel::ReSize() { | |||
| FreeTmpBuffer(); | |||
| int batch = 1; | |||
| auto x_shape = in_tensors_[0]->shape(); | |||
| auto o_shape = out_tensors_[0]->shape(); | |||
| @@ -88,8 +88,6 @@ int MatmulInt8CPUKernel::Init() { | |||
| return RET_OK; | |||
| } | |||
| int MatmulInt8CPUKernel::ReSize() { return RET_OK; } | |||
| int MatmulInt8CPUKernel::RunImpl(int task_id) { | |||
| int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_); | |||
| if (cur_oc <= 0) { | |||
| @@ -38,10 +38,24 @@ class MatmulInt8CPUKernel : public MatmulBaseCPUKernel { | |||
| int RunImpl(int task_id); | |||
| private: | |||
| void FreeTmpBuffer() { | |||
| if (a_c8_ptr_ != nullptr) { | |||
| ctx_->allocator->Free(a_c8_ptr_); | |||
| a_c8_ptr_ = nullptr; | |||
| } | |||
| if (b_r8_ptr_ != nullptr) { | |||
| ctx_->allocator->Free(b_r8_ptr_); | |||
| b_r8_ptr_ = nullptr; | |||
| } | |||
| if (c_r8x8_ptr_ != nullptr) { | |||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||
| c_r8x8_ptr_ = nullptr; | |||
| } | |||
| } | |||
| MatmulQuantArg quant_params_; | |||
| int8_t *a_c8_ptr_; | |||
| int8_t *b_r8_ptr_; | |||
| int *c_r8x8_ptr_; | |||
| int8_t *a_c8_ptr_ = nullptr; | |||
| int8_t *b_r8_ptr_ = nullptr; | |||
| int *c_r8x8_ptr_ = nullptr; | |||
| }; | |||
| } // namespace mindspore::kernel | |||
| @@ -40,21 +40,24 @@ int PoolingInt8CPUKernel::Init() { | |||
| MS_LOG(ERROR) << "Set pooling quant param failed."; | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| if (!InferShapeDone()) { | |||
| return RET_OK; | |||
| } | |||
| return ReSize(); | |||
| } | |||
| int PoolingInt8CPUKernel::ReSize() { | |||
| FreeQuantParam(); | |||
| auto ret = PoolingBaseCPUKernel::Init(); | |||
| auto ret = PoolingBaseCPUKernel::ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "PoolingBase Init failed."; | |||
| return RET_ERROR; | |||
| return ret; | |||
| } | |||
| SetQuantParam(); | |||
| ret = SetQuantParam(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "Set pooling quant param failed."; | |||
| return RET_ERROR; | |||
| return ret; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| @@ -45,6 +45,33 @@ int Scheduler::Schedule(const lite::Model *model, std::vector<tensor::Tensor *> | |||
| return RET_OK; | |||
| } | |||
| int Scheduler::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels) { | |||
| for (size_t i = 0; i < kernels.size(); ++i) { | |||
| if (kernels[i] == nullptr) { | |||
| MS_LOG(ERROR) << "input kernel is nullptr!"; | |||
| return RET_ERROR; | |||
| } | |||
| auto primitive = const_cast<lite::Primitive *>(kernels[i]->GetPrimitive()); | |||
| if (primitive == nullptr) { | |||
| MS_LOG(ERROR) << "kernel(" << kernels[i]->name() << ")'s primitive is nullptr!"; | |||
| return RET_ERROR; | |||
| } | |||
| std::vector<tensor::Tensor *> &inputs = kernels[i]->in_tensors(); | |||
| std::vector<tensor::Tensor *> &outputs = kernels[i]->out_tensors(); | |||
| auto ret = primitive->InferShape(inputs, outputs); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "InferShape failed, name: " << kernels[i]->name() << ", ret = " << ret; | |||
| return ret; | |||
| } | |||
| ret = kernels[i]->ReSize(); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "kernel " << kernels[i]->name() << " resize fail!ret = " << ret; | |||
| return ret; | |||
| } | |||
| } | |||
| return RET_OK; | |||
| } | |||
| int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<tensor::Tensor *> *tensors, | |||
| std::vector<kernel::LiteKernel *> *kernels) { | |||
| MS_EXCEPTION_IF_NULL(model); | |||
| @@ -29,6 +29,8 @@ class Scheduler { | |||
| int Schedule(const lite::Model *model, std::vector<tensor::Tensor *> *tensors, | |||
| std::vector<kernel::LiteKernel *> *kernels); | |||
| int ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels); | |||
| protected: | |||
| kernel::LiteKernel *ScheduleNode(const std::vector<tensor::Tensor *> &in_tensors, | |||
| const std::vector<tensor::Tensor *> &out_tensors, const lite::Primitive *primitive); | |||