| @@ -97,6 +97,13 @@ class MS_API LiteSession { | |||||
| /// | /// | ||||
| /// \return The vector of MindSpore Lite MSTensor. | /// \return The vector of MindSpore Lite MSTensor. | ||||
| virtual std::vector<tensor::MSTensor *> GetOutputsByName(const std::string &node_name) const = 0; | virtual std::vector<tensor::MSTensor *> GetOutputsByName(const std::string &node_name) const = 0; | ||||
| /// \brief Resize inputs shape. | |||||
| /// | |||||
| /// \param[in] inputs Define the new inputs shape. | |||||
| /// | |||||
| /// \return STATUS as an error code of resize inputs, STATUS is defined in errorcode.h. | |||||
| virtual int Resize(const std::vector<tensor::MSTensor *> &inputs) = 0; | |||||
| }; | }; | ||||
| } // namespace session | } // namespace session | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -57,22 +57,27 @@ struct KernelKey { | |||||
| class LiteKernel { | class LiteKernel { | ||||
| public: | public: | ||||
| LiteKernel() = default; | LiteKernel() = default; | ||||
| explicit LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &in_tensors, | |||||
| const std::vector<lite::tensor::Tensor *> &out_tensors, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| LiteKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &in_tensors, | |||||
| const std::vector<lite::tensor::Tensor *> &out_tensors, const lite::Context *ctx, | |||||
| const lite::Primitive *primitive) | |||||
| : op_parameter_(parameter), | : op_parameter_(parameter), | ||||
| in_tensors_(in_tensors), | in_tensors_(in_tensors), | ||||
| out_tensors_(out_tensors), | out_tensors_(out_tensors), | ||||
| primitive_(primitive), | primitive_(primitive), | ||||
| context_(ctx) { | context_(ctx) { | ||||
| if (op_parameter_ && ctx) { | |||||
| if (op_parameter_ != nullptr && ctx != nullptr) { | |||||
| op_parameter_->thread_num_ = ctx->thread_num_; | op_parameter_->thread_num_ = ctx->thread_num_; | ||||
| } | } | ||||
| this->in_kernels_.clear(); | this->in_kernels_.clear(); | ||||
| this->out_kernels_.clear(); | this->out_kernels_.clear(); | ||||
| } | } | ||||
| virtual ~LiteKernel() { delete op_parameter_; } | |||||
| virtual ~LiteKernel() { | |||||
| if (op_parameter_ != nullptr) { | |||||
| delete op_parameter_; | |||||
| op_parameter_ = nullptr; | |||||
| } | |||||
| } | |||||
| virtual int Prepare() { | virtual int Prepare() { | ||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| @@ -149,6 +154,8 @@ class LiteKernel { | |||||
| void set_need_reinit() { need_reinit_ = true; } | void set_need_reinit() { need_reinit_ = true; } | ||||
| const lite::Primitive *GetPrimitive() const { return primitive_; } | |||||
| protected: | protected: | ||||
| bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()) && true; } | bool InferShapeDone() { return !(primitive_ != nullptr && !primitive_->GetInferFlag()) && true; } | ||||
| @@ -315,6 +315,41 @@ std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByName(const s | |||||
| } | } | ||||
| return ret->second; | return ret->second; | ||||
| } | } | ||||
| int LiteSession::ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs) { | |||||
| if (inputs.size() != inputs_.size()) { | |||||
| MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size(); | |||||
| return RET_PARAM_INVALID; | |||||
| } | |||||
| for (size_t i = 0; i < inputs.size(); ++i) { | |||||
| if (inputs[i] == nullptr) { | |||||
| MS_LOG(ERROR) << "Input tensor is nullptr!"; | |||||
| return RET_PARAM_INVALID; | |||||
| } | |||||
| inputs_[i]->set_shape(inputs[i]->shape()); | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs) { | |||||
| inputs_old_.clear(); | |||||
| inputs_old_ = inputs_; | |||||
| auto ret = ResizeInputs(inputs); | |||||
| if (ret != RET_OK) { | |||||
| inputs_ = inputs_old_; | |||||
| return ret; | |||||
| } | |||||
| Scheduler scheduler(context_); | |||||
| ret = scheduler.ReSizeKernels(kernels_); | |||||
| if (ret != RET_OK) { | |||||
| inputs_ = inputs_old_; | |||||
| scheduler.ReSizeKernels(kernels_); | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| } // namespace lite | } // namespace lite | ||||
| session::LiteSession *session::LiteSession::CreateSession(lite::Context *context) { | session::LiteSession *session::LiteSession::CreateSession(lite::Context *context) { | ||||
| @@ -327,4 +362,5 @@ session::LiteSession *session::LiteSession::CreateSession(lite::Context *context | |||||
| } | } | ||||
| return session; | return session; | ||||
| } | } | ||||
| } // namespace mindspore | } // namespace mindspore | ||||
| @@ -54,6 +54,8 @@ class LiteSession : public session::LiteSession { | |||||
| std::vector<mindspore::tensor::MSTensor *> GetOutputsByName(const std::string &name) const override; | std::vector<mindspore::tensor::MSTensor *> GetOutputsByName(const std::string &name) const override; | ||||
| int Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs) override; | |||||
| protected: | protected: | ||||
| int ConvertTensors(const lite::Model *model); | int ConvertTensors(const lite::Model *model); | ||||
| @@ -68,6 +70,8 @@ class LiteSession : public session::LiteSession { | |||||
| void InitGraphInputMap(const lite::Model *model); | void InitGraphInputMap(const lite::Model *model); | ||||
| // init this->output_map_ | // init this->output_map_ | ||||
| void InitGraphOutputMap(const lite::Model *model); | void InitGraphOutputMap(const lite::Model *model); | ||||
| // resize inputs | |||||
| int ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs); | |||||
| protected: | protected: | ||||
| Context *context_ = nullptr; | Context *context_ = nullptr; | ||||
| @@ -75,6 +79,7 @@ class LiteSession : public session::LiteSession { | |||||
| std::vector<tensor::Tensor *> tensors_; | std::vector<tensor::Tensor *> tensors_; | ||||
| // graph input tensors | // graph input tensors | ||||
| std::vector<tensor::Tensor *> inputs_; | std::vector<tensor::Tensor *> inputs_; | ||||
| std::vector<tensor::Tensor *> inputs_old_; | |||||
| // graph output tensors | // graph output tensors | ||||
| std::vector<tensor::Tensor *> outputs_; | std::vector<tensor::Tensor *> outputs_; | ||||
| // graph input MSTensors | // graph input MSTensors | ||||
| @@ -56,14 +56,14 @@ void PoolingBaseCPUKernel::FreeQuantParam() { | |||||
| } | } | ||||
| int PoolingBaseCPUKernel::Init() { | int PoolingBaseCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| return RET_OK; | |||||
| } | |||||
| MS_ASSERT(in_tensors_.size() == 1); | MS_ASSERT(in_tensors_.size() == 1); | ||||
| MS_ASSERT(out_tensors_.size() == 1); | MS_ASSERT(out_tensors_.size() == 1); | ||||
| pooling_param_->thread_num_ = thread_count_; | pooling_param_->thread_num_ = thread_count_; | ||||
| MS_ASSERT(this->op_parameter_ != nullptr); | MS_ASSERT(this->op_parameter_ != nullptr); | ||||
| return RET_OK; | |||||
| } | |||||
| int PoolingBaseCPUKernel::ReSize() { | |||||
| auto in_tensor = this->in_tensors_.front(); | auto in_tensor = this->in_tensors_.front(); | ||||
| auto out_tensor = this->out_tensors_.front(); | auto out_tensor = this->out_tensors_.front(); | ||||
| MS_ASSERT(in_tensor != nullptr); | MS_ASSERT(in_tensor != nullptr); | ||||
| @@ -37,7 +37,7 @@ class PoolingBaseCPUKernel : public LiteKernel { | |||||
| ~PoolingBaseCPUKernel() = default; | ~PoolingBaseCPUKernel() = default; | ||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override { return RET_OK; } | |||||
| int ReSize() override; | |||||
| int Run() override { return RET_OK; } | int Run() override { return RET_OK; } | ||||
| int SetQuantParam(); | int SetQuantParam(); | ||||
| void FreeQuantParam(); | void FreeQuantParam(); | ||||
| @@ -98,38 +98,21 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| int Convolution1x1FP16CPUKernel::Init() { | int Convolution1x1FP16CPUKernel::Init() { | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = InitMatmulParam(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init matmul param failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = InitConv1x1Param(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init conv1x1 param failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return ret; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | } | ||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int Convolution1x1FP16CPUKernel::ReSize() { | int Convolution1x1FP16CPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| if (fp16_weight_ != nullptr) { | if (fp16_weight_ != nullptr) { | ||||
| free(fp16_weight_); | free(fp16_weight_); | ||||
| fp16_weight_ = nullptr; | |||||
| } | } | ||||
| if (input_ptr_ != nullptr) { | if (input_ptr_ != nullptr) { | ||||
| free(input_ptr_); | free(input_ptr_); | ||||
| } | |||||
| if (weight_ptr_ != nullptr) { | |||||
| free(weight_ptr_); | |||||
| input_ptr_ = nullptr; | |||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -147,6 +130,11 @@ int Convolution1x1FP16CPUKernel::ReSize() { | |||||
| MS_LOG(ERROR) << "Init conv1x1 param failed."; | MS_LOG(ERROR) << "Init conv1x1 param failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -34,26 +34,32 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) { | : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) { | ||||
| matmul_param_ = new MatMulParameter(); | matmul_param_ = new MatMulParameter(); | ||||
| } | } | ||||
| ~Convolution1x1FP16CPUKernel() override { | |||||
| if (weight_ptr_ != nullptr) { | |||||
| free(weight_ptr_); | |||||
| } | |||||
| if (pack_input_ != nullptr) { | |||||
| free(pack_input_); | |||||
| } | |||||
| delete matmul_param_; | |||||
| } | |||||
| ~Convolution1x1FP16CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| int RunImpl(int task_id); | int RunImpl(int task_id); | ||||
| int InitBuffer(); | |||||
| int InitConv1x1Param(); | int InitConv1x1Param(); | ||||
| int InitMatmulParam(); | int InitMatmulParam(); | ||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| void Pre1x1Trans(float16_t *src_input, float16_t *src_output); | void Pre1x1Trans(float16_t *src_input, float16_t *src_output); | ||||
| private: | private: | ||||
| void FreeTmpBuffer() { | |||||
| if (weight_ptr_ != nullptr) { | |||||
| free(weight_ptr_); | |||||
| weight_ptr_ = nullptr; | |||||
| } | |||||
| if (matmul_param_ != nullptr) { | |||||
| delete matmul_param_; | |||||
| matmul_param_ = nullptr; | |||||
| } | |||||
| if (pack_input_ != nullptr) { | |||||
| free(pack_input_); | |||||
| } | |||||
| } | |||||
| bool pre_trans_input_ = false; | bool pre_trans_input_ = false; | ||||
| int thread_count_ = 0; | int thread_count_ = 0; | ||||
| int thread_stride_ = 0; | int thread_stride_ = 0; | ||||
| @@ -157,52 +157,39 @@ void Convolution3x3FP16CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int Convolution3x3FP16CPUKernel::Init() { | int Convolution3x3FP16CPUKernel::Init() { | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | } | ||||
| ConfigInputOutput(); | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int Convolution3x3FP16CPUKernel::ReSize() { | int Convolution3x3FP16CPUKernel::ReSize() { | ||||
| FreeTmpBuffer(); | |||||
| if (tile_buffer_ != nullptr) { | if (tile_buffer_ != nullptr) { | ||||
| free(tile_buffer_); | free(tile_buffer_); | ||||
| } | |||||
| if (block_unit_buffer_ != nullptr) { | |||||
| free(block_unit_buffer_); | |||||
| } | |||||
| if (tmp_dst_buffer_ != nullptr) { | |||||
| free(tmp_dst_buffer_); | |||||
| } | |||||
| if (tmp_out_ != nullptr) { | |||||
| free(tmp_out_); | |||||
| tile_buffer_ = nullptr; | |||||
| } | } | ||||
| if (nhwc4_input_ != nullptr) { | if (nhwc4_input_ != nullptr) { | ||||
| free(nhwc4_input_); | free(nhwc4_input_); | ||||
| nhwc4_input_ = nullptr; | |||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ConfigInputOutput(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -30,41 +30,49 @@ class Convolution3x3FP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~Convolution3x3FP16CPUKernel() override { | |||||
| ~Convolution3x3FP16CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| private: | |||||
| void FreeTmpBuffer() { | |||||
| if (fp16_weight_ != nullptr) { | if (fp16_weight_ != nullptr) { | ||||
| free(fp16_weight_); | free(fp16_weight_); | ||||
| fp16_weight_ = nullptr; | |||||
| } | } | ||||
| if (transformed_filter_addr_ != nullptr) { | if (transformed_filter_addr_ != nullptr) { | ||||
| free(transformed_filter_addr_); | free(transformed_filter_addr_); | ||||
| transformed_filter_addr_ = nullptr; | |||||
| } | } | ||||
| if (tile_buffer_ != nullptr) { | if (tile_buffer_ != nullptr) { | ||||
| free(tile_buffer_); | free(tile_buffer_); | ||||
| tile_buffer_ = nullptr; | |||||
| } | } | ||||
| if (block_unit_buffer_ != nullptr) { | if (block_unit_buffer_ != nullptr) { | ||||
| free(block_unit_buffer_); | free(block_unit_buffer_); | ||||
| block_unit_buffer_ = nullptr; | |||||
| } | } | ||||
| if (tmp_dst_buffer_ != nullptr) { | if (tmp_dst_buffer_ != nullptr) { | ||||
| free(tmp_dst_buffer_); | free(tmp_dst_buffer_); | ||||
| tmp_dst_buffer_ = nullptr; | |||||
| } | } | ||||
| if (tmp_out_ != nullptr) { | if (tmp_out_ != nullptr) { | ||||
| free(tmp_out_); | free(tmp_out_); | ||||
| tmp_out_ = nullptr; | |||||
| } | } | ||||
| } | } | ||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| private: | |||||
| float16_t *transformed_filter_addr_; | |||||
| float16_t *tile_buffer_; | |||||
| float16_t *block_unit_buffer_; | |||||
| float16_t *tmp_dst_buffer_; | |||||
| float16_t *tmp_out_; | |||||
| float16_t *transformed_filter_addr_ = nullptr; | |||||
| float16_t *tile_buffer_ = nullptr; | |||||
| float16_t *block_unit_buffer_ = nullptr; | |||||
| float16_t *tmp_dst_buffer_ = nullptr; | |||||
| float16_t *tmp_out_ = nullptr; | |||||
| }; | }; | ||||
| void ProcessFilterFp16(float16_t *origin_weight, float16_t *dst_weight, ConvParameter *conv_param); | void ProcessFilterFp16(float16_t *origin_weight, float16_t *dst_weight, ConvParameter *conv_param); | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -43,9 +43,9 @@ class ConvolutionBaseFP16CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| protected: | protected: | ||||
| float16_t *fp16_weight_ = nullptr; | float16_t *fp16_weight_ = nullptr; | ||||
| float16_t *execute_input_; // ctx allocator malloc and free | |||||
| float16_t *execute_weight_; | |||||
| float16_t *execute_output_; // ctx allocator malloc and free | |||||
| float16_t *execute_input_ = nullptr; | |||||
| float16_t *execute_weight_ = nullptr; | |||||
| float16_t *execute_output_ = nullptr; | |||||
| TypeId in_data_type_; | TypeId in_data_type_; | ||||
| TypeId out_data_type_; | TypeId out_data_type_; | ||||
| }; | }; | ||||
| @@ -29,8 +29,14 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | using mindspore::schema::PrimitiveType_DepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { | |||||
| delete sliding_; | |||||
| ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { FreeTmpBuffer(); } | |||||
| void ConvolutionDepthwiseFp16CPUKernel::FreeTmpBuffer() { | |||||
| if (sliding_ != nullptr) { | |||||
| delete sliding_; | |||||
| sliding_ = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| @@ -102,6 +108,14 @@ int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseFp16CPUKernel::Init() { | int ConvolutionDepthwiseFp16CPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwiseFp16CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| // conv base init | // conv base init | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -125,27 +139,6 @@ int ConvolutionDepthwiseFp16CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionDepthwiseFp16CPUKernel::ReSize() { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| InitSlidingParamConvDw(sliding_, conv_param_, C8NUM); | |||||
| auto ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { | int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { | ||||
| ConvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, | ConvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, | ||||
| sliding_, task_id); | sliding_, task_id); | ||||
| @@ -51,10 +51,11 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| SlidingWindowParam *sliding_; | |||||
| float16_t *packed_weight_; | |||||
| float16_t *packed_input_; | |||||
| float16_t *packed_output_; | |||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | |||||
| float16_t *packed_weight_ = nullptr; | |||||
| float16_t *packed_input_ = nullptr; | |||||
| float16_t *packed_output_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -135,46 +135,36 @@ void ConvolutionFP16CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionFP16CPUKernel::Init() { | int ConvolutionFP16CPUKernel::Init() { | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | } | ||||
| ConfigInputOutput(); | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ConvolutionFP16CPUKernel::ReSize() { | int ConvolutionFP16CPUKernel::ReSize() { | ||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| } | |||||
| FreeTmpBuffer(); | |||||
| if (nhwc4_input_ != nullptr) { | if (nhwc4_input_ != nullptr) { | ||||
| free(nhwc4_input_); | free(nhwc4_input_); | ||||
| nhwc4_input_ = nullptr; | |||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ConfigInputOutput(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -29,33 +29,39 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ConvolutionFP16CPUKernel() override { | |||||
| ~ConvolutionFP16CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| private: | |||||
| void FreeTmpBuffer() { | |||||
| if (fp16_weight_ != nullptr) { | if (fp16_weight_ != nullptr) { | ||||
| free(fp16_weight_); | free(fp16_weight_); | ||||
| fp16_weight_ = nullptr; | |||||
| } | } | ||||
| if (packed_input_ != nullptr) { | if (packed_input_ != nullptr) { | ||||
| free(packed_input_); | free(packed_input_); | ||||
| packed_input_ = nullptr; | |||||
| } | } | ||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| free(packed_weight_); | free(packed_weight_); | ||||
| packed_weight_ = nullptr; | |||||
| } | } | ||||
| if (tmp_output_block_ != nullptr) { | if (tmp_output_block_ != nullptr) { | ||||
| free(tmp_output_block_); | free(tmp_output_block_); | ||||
| tmp_output_block_ = nullptr; | |||||
| } | } | ||||
| } | } | ||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| private: | |||||
| float16_t *packed_input_; | |||||
| float16_t *packed_weight_; | |||||
| float16_t *tmp_output_block_; | |||||
| float16_t *packed_input_ = nullptr; | |||||
| float16_t *packed_weight_ = nullptr; | |||||
| float16_t *tmp_output_block_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -139,6 +139,19 @@ void ConvolutionSWFP16CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionSWFP16CPUKernel::Init() { | int ConvolutionSWFP16CPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionSWFP16CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| if (nhwc4_input_ != nullptr) { | |||||
| free(nhwc4_input_); | |||||
| nhwc4_input_ = nullptr; | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | MS_LOG(ERROR) << "ConvolutionBase init fail!ret: " << ret; | ||||
| @@ -162,31 +175,6 @@ int ConvolutionSWFP16CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionSWFP16CPUKernel::ReSize() { | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| } | |||||
| if (nhwc4_input_ != nullptr) { | |||||
| free(nhwc4_input_); | |||||
| } | |||||
| delete slidingWindow_param_; | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return ret; | |||||
| } | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init sliding window param | |||||
| slidingWindow_param_ = new SlidingWindowParam; | |||||
| InitSlidingParamConv(slidingWindow_param_, conv_param_, C4NUM); | |||||
| return RET_OK; | |||||
| } | |||||
| int ConvolutionSWFP16CPUKernel::RunImpl(int task_id) { | int ConvolutionSWFP16CPUKernel::RunImpl(int task_id) { | ||||
| ConvSWFp16(reinterpret_cast<float16_t *>(nhwc4_input_), packed_weight_, reinterpret_cast<float16_t *>(bias_data_), | ConvSWFp16(reinterpret_cast<float16_t *>(nhwc4_input_), packed_weight_, reinterpret_cast<float16_t *>(bias_data_), | ||||
| tmp_output_block_, execute_output_, task_id, conv_param_, slidingWindow_param_); | tmp_output_block_, execute_output_, task_id, conv_param_, slidingWindow_param_); | ||||
| @@ -28,18 +28,7 @@ class ConvolutionSWFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ConvolutionSWFP16CPUKernel() override { | |||||
| if (fp16_weight_ != nullptr) { | |||||
| free(fp16_weight_); | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | |||||
| free(packed_weight_); | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| } | |||||
| delete slidingWindow_param_; | |||||
| } | |||||
| ~ConvolutionSWFP16CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -51,9 +40,27 @@ class ConvolutionSWFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| int ProcessFilter(); | int ProcessFilter(); | ||||
| private: | private: | ||||
| float16_t *packed_weight_; | |||||
| float16_t *tmp_output_block_; | |||||
| SlidingWindowParam *slidingWindow_param_; | |||||
| void FreeTmpBuffer() { | |||||
| if (fp16_weight_ != nullptr) { | |||||
| free(fp16_weight_); | |||||
| fp16_weight_ = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| tmp_output_block_ = nullptr; | |||||
| } | |||||
| if (slidingWindow_param_ != nullptr) { | |||||
| delete slidingWindow_param_; | |||||
| slidingWindow_param_ = nullptr; | |||||
| } | |||||
| } | |||||
| float16_t *packed_weight_ = nullptr; | |||||
| float16_t *tmp_output_block_ = nullptr; | |||||
| SlidingWindowParam *slidingWindow_param_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -265,50 +265,17 @@ int ConvolutionWinogradFP16CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionWinogradFP16CPUKernel::Init() { | int ConvolutionWinogradFP16CPUKernel::Init() { | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| kernel_unit_ = conv_param_->kernel_h_; | |||||
| input_unit_ = output_unit_ + kernel_unit_ - 1; | |||||
| conv_param_->input_unit_ = input_unit_; | |||||
| conv_param_->output_unit_ = output_unit_; | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // malloc tmp buffer | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | } | ||||
| ret = ConfigInputOutput(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConfigInputOutput failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ConvolutionWinogradFP16CPUKernel::ReSize() { | int ConvolutionWinogradFP16CPUKernel::ReSize() { | ||||
| if (tmp_data_ != nullptr) { | |||||
| free(tmp_data_); | |||||
| } | |||||
| if (trans_input_ != nullptr) { | |||||
| free(trans_input_); | |||||
| } | |||||
| if (gemm_out_ != nullptr) { | |||||
| free(gemm_out_); | |||||
| } | |||||
| if (tmp_out_data_ != nullptr) { | |||||
| free(tmp_out_data_); | |||||
| } | |||||
| FreeTmpBuffer(); | |||||
| if (nhwc4_input_ != nullptr) { | if (nhwc4_input_ != nullptr) { | ||||
| free(nhwc4_input_); | free(nhwc4_input_); | ||||
| nhwc4_input_ = nullptr; | |||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -321,6 +288,12 @@ int ConvolutionWinogradFP16CPUKernel::ReSize() { | |||||
| conv_param_->input_unit_ = input_unit_; | conv_param_->input_unit_ = input_unit_; | ||||
| conv_param_->output_unit_ = output_unit_; | conv_param_->output_unit_ = output_unit_; | ||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // malloc tmp buffer | |||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| @@ -33,43 +33,52 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseFP16CPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const lite::Primitive *primitive, int out_unit) | const lite::Primitive *primitive, int out_unit) | ||||
| : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(out_unit) {} | : ConvolutionBaseFP16CPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(out_unit) {} | ||||
| ~ConvolutionWinogradFP16CPUKernel() override { | |||||
| ~ConvolutionWinogradFP16CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||||
| int InitTmpBuffer(); | |||||
| int ConfigInputOutput(); | |||||
| private: | |||||
| void FreeTmpBuffer() { | |||||
| if (fp16_weight_ != nullptr) { | if (fp16_weight_ != nullptr) { | ||||
| free(fp16_weight_); | free(fp16_weight_); | ||||
| fp16_weight_ = nullptr; | |||||
| } | } | ||||
| if (tmp_data_ != nullptr) { | if (tmp_data_ != nullptr) { | ||||
| free(tmp_data_); | free(tmp_data_); | ||||
| tmp_data_ = nullptr; | |||||
| } | } | ||||
| if (trans_input_ != nullptr) { | if (trans_input_ != nullptr) { | ||||
| free(trans_input_); | free(trans_input_); | ||||
| trans_input_ = nullptr; | |||||
| } | } | ||||
| if (gemm_out_ != nullptr) { | if (gemm_out_ != nullptr) { | ||||
| free(gemm_out_); | free(gemm_out_); | ||||
| gemm_out_ = nullptr; | |||||
| } | } | ||||
| if (tmp_out_data_ != nullptr) { | if (tmp_out_data_ != nullptr) { | ||||
| free(tmp_out_data_); | free(tmp_out_data_); | ||||
| tmp_out_data_ = nullptr; | |||||
| } | |||||
| if (trans_weight_ != nullptr) { | |||||
| delete trans_weight_; | |||||
| trans_weight_ = nullptr; | |||||
| } | } | ||||
| delete trans_weight_; | |||||
| } | } | ||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||||
| int InitTmpBuffer(); | |||||
| int ConfigInputOutput(); | |||||
| private: | |||||
| int kernel_unit_; | int kernel_unit_; | ||||
| int input_unit_; | int input_unit_; | ||||
| int output_unit_; | int output_unit_; | ||||
| float16_t *tmp_data_; | |||||
| float16_t *trans_input_; | |||||
| float16_t *gemm_out_; | |||||
| float16_t *tmp_out_data_; | |||||
| Matrix *trans_weight_; | |||||
| float16_t *tmp_data_ = nullptr; | |||||
| float16_t *trans_input_ = nullptr; | |||||
| float16_t *gemm_out_ = nullptr; | |||||
| float16_t *tmp_out_data_ = nullptr; | |||||
| Matrix *trans_weight_ = nullptr; | |||||
| InputTransformUnitFp16Func input_trans_func_; | InputTransformUnitFp16Func input_trans_func_; | ||||
| OutputTransformUnitFp16Func output_trans_func_; | OutputTransformUnitFp16Func output_trans_func_; | ||||
| TmpBufferAddressFp16 tmp_buffer_address_list_[4]; | TmpBufferAddressFp16 tmp_buffer_address_list_[4]; | ||||
| @@ -28,8 +28,14 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { | |||||
| delete sliding_; | |||||
| DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { FreeTmpBuffer(); } | |||||
| void DeconvolutionDepthwiseFp16CPUKernel::FreeTmpBuffer() { | |||||
| if (sliding_ != nullptr) { | |||||
| delete sliding_; | |||||
| sliding_ = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| @@ -115,6 +121,15 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::Init() { | int DeconvolutionDepthwiseFp16CPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | |||||
| int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| sliding_ = new SlidingWindowParam; | sliding_ = new SlidingWindowParam; | ||||
| InitSlideParam(); | InitSlideParam(); | ||||
| // conv base init | // conv base init | ||||
| @@ -137,27 +152,6 @@ int DeconvolutionDepthwiseFp16CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| InitSlideParam(); | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| auto ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp16 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { | int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { | ||||
| DeconvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, | DeconvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, | ||||
| sliding_, task_id); | sliding_, task_id); | ||||
| @@ -52,10 +52,11 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseFP16CPUKernel | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| SlidingWindowParam *sliding_; | |||||
| float16_t *packed_weight_; | |||||
| float16_t *packed_input_; | |||||
| float16_t *packed_output_; | |||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | |||||
| float16_t *packed_weight_ = nullptr; | |||||
| float16_t *packed_input_ = nullptr; | |||||
| float16_t *packed_output_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -32,7 +32,19 @@ DeConvolutionFp16CPUKernel::~DeConvolutionFp16CPUKernel() { | |||||
| int DeConvolutionFp16CPUKernel::ReSize() { | int DeConvolutionFp16CPUKernel::ReSize() { | ||||
| FreeParam(); | FreeParam(); | ||||
| InitParam(); | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| int error_code = InitParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitParam error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitWeightBias(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitWeightBias error!"; | |||||
| return error_code; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -141,24 +153,10 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) { | |||||
| } | } | ||||
| int DeConvolutionFp16CPUKernel::Init() { | int DeConvolutionFp16CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| int error_code = InitParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitParam error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitWeightBias(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitWeightBias error!"; | |||||
| return error_code; | |||||
| } | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int DeConvolutionFp16CPUKernel::Run() { | int DeConvolutionFp16CPUKernel::Run() { | ||||
| @@ -59,19 +59,23 @@ int PoolingFp16CPUKernel::Init() { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init Buffer failed."; | |||||
| return ret; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | } | ||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int PoolingFp16CPUKernel::ReSize() { | int PoolingFp16CPUKernel::ReSize() { | ||||
| auto ret = Init(); | |||||
| auto ret = PoolingBaseCPUKernel::ReSize(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Pooling resize init failed."; | |||||
| return RET_ERROR; | |||||
| MS_LOG(ERROR) << "PoolingBase ReSize fai1!ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init Buffer fail!ret: " << ret; | |||||
| return ret; | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -131,40 +131,17 @@ void ConvolutionCPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionCPUKernel::Init() { | int ConvolutionCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init tmp input, output | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // config input output | |||||
| ConfigInputOutput(); | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ConvolutionCPUKernel::ReSize() { | int ConvolutionCPUKernel::ReSize() { | ||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| } | |||||
| FreeTmpBuffer(); | |||||
| if (nhwc4_input_ != nullptr) { | if (nhwc4_input_ != nullptr) { | ||||
| free(nhwc4_input_); | free(nhwc4_input_); | ||||
| nhwc4_input_ = nullptr; | |||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -172,12 +149,19 @@ int ConvolutionCPUKernel::ReSize() { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init tmp input, output | // init tmp input, output | ||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| // config input output | |||||
| ConfigInputOutput(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -30,17 +30,7 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ConvolutionCPUKernel() override { | |||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | |||||
| free(packed_weight_); | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| } | |||||
| }; | |||||
| ~ConvolutionCPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -51,9 +41,23 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| void ConfigInputOutput(); | void ConfigInputOutput(); | ||||
| private: | private: | ||||
| float *packed_input_; | |||||
| float *packed_weight_; | |||||
| float *tmp_output_block_; | |||||
| void FreeTmpBuffer() { | |||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| tmp_output_block_ = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | |||||
| } | |||||
| } | |||||
| float *packed_input_ = nullptr; | |||||
| float *packed_weight_ = nullptr; | |||||
| float *tmp_output_block_ = nullptr; | |||||
| GEMM_FUNC_FP32 gemm_func_ = nullptr; | GEMM_FUNC_FP32 gemm_func_ = nullptr; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -23,6 +23,13 @@ using mindspore::lite::RET_OK; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { | Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { | ||||
| FreeTmpBuffer(); | |||||
| if (matmul_param_ != nullptr) { | |||||
| delete matmul_param_; | |||||
| } | |||||
| } | |||||
| void Convolution1x1CPUKernel::FreeTmpBuffer() { | |||||
| if (weight_ptr_ != nullptr) { | if (weight_ptr_ != nullptr) { | ||||
| free(weight_ptr_); | free(weight_ptr_); | ||||
| weight_ptr_ = nullptr; | weight_ptr_ = nullptr; | ||||
| @@ -35,20 +42,23 @@ Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { | |||||
| free(input_ptr_); | free(input_ptr_); | ||||
| input_ptr_ = nullptr; | input_ptr_ = nullptr; | ||||
| } | } | ||||
| delete matmul_param_; | |||||
| } | } | ||||
| int Convolution1x1CPUKernel::ReSize() { | int Convolution1x1CPUKernel::ReSize() { | ||||
| if (pack_input_ != nullptr) { | |||||
| free(pack_input_); | |||||
| pack_input_ = nullptr; | |||||
| FreeTmpBuffer(); | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| InitConv1x1MatmulParam(); | |||||
| int error_code = InitConv1x1BiasWeight(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||||
| return error_code; | |||||
| } | } | ||||
| if (pre_trans_input_ && input_ptr_ != nullptr) { | |||||
| free(input_ptr_); | |||||
| input_ptr_ = nullptr; | |||||
| error_code = InitConv1x1Param(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||||
| return error_code; | |||||
| } | } | ||||
| InitConv1x1MatmulParam(); | |||||
| InitConv1x1Param(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -125,24 +135,10 @@ void Convolution1x1CPUKernel::Pre1x1Trans(float *src_input, float *src_output) { | |||||
| } | } | ||||
| int Convolution1x1CPUKernel::Init() { | int Convolution1x1CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| InitConv1x1MatmulParam(); | |||||
| int error_code = InitConv1x1BiasWeight(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitConv1x1Param(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Convolution base init failed."; | |||||
| return error_code; | |||||
| } | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int Convolution1x1CPUKernel::DoConv1x1(int task_id) { | int Convolution1x1CPUKernel::DoConv1x1(int task_id) { | ||||
| @@ -52,6 +52,7 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int InitConv1x1BiasWeight(); | int InitConv1x1BiasWeight(); | ||||
| void InitConv1x1MatmulParam(); | void InitConv1x1MatmulParam(); | ||||
| void Pre1x1Trans(float *src_input, float *src_output); | void Pre1x1Trans(float *src_input, float *src_output); | ||||
| void FreeTmpBuffer(); | |||||
| private: | private: | ||||
| MatMulParameter *matmul_param_ = nullptr; | MatMulParameter *matmul_param_ = nullptr; | ||||
| @@ -159,59 +159,34 @@ void Convolution3x3CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int Convolution3x3CPUKernel::Init() { | int Convolution3x3CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int Convolution3x3CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed.ret: " << ret; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = InitWeightBias(); | ret = InitWeightBias(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| MS_LOG(ERROR) << "Init weight bias failed.ret: " << ret; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed.ret: " << ret; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| ConfigInputOutput(); | ConfigInputOutput(); | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int Convolution3x3CPUKernel::ReSize() { | |||||
| if (tile_buffer_ != nullptr) { | |||||
| free(tile_buffer_); | |||||
| } | |||||
| if (block_unit_buffer_ != nullptr) { | |||||
| free(block_unit_buffer_); | |||||
| } | |||||
| if (tmp_dst_buffer_ != nullptr) { | |||||
| free(tmp_dst_buffer_); | |||||
| } | |||||
| if (nhwc4_input_ != nullptr) { | |||||
| free(nhwc4_input_); | |||||
| } | |||||
| if (nc4hw4_out_ != nullptr) { | |||||
| free(nc4hw4_out_); | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int Convolution3x3CPUKernel::RunImpl(int task_id) { | int Convolution3x3CPUKernel::RunImpl(int task_id) { | ||||
| if (gemm_func_ == nullptr) { | if (gemm_func_ == nullptr) { | ||||
| MS_LOG(ERROR) << "gemm_func is nullptr."; | MS_LOG(ERROR) << "gemm_func is nullptr."; | ||||
| @@ -29,38 +29,45 @@ class Convolution3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~Convolution3x3CPUKernel() override { | |||||
| if (transformed_filter_addr_ != nullptr) { | |||||
| free(transformed_filter_addr_); | |||||
| } | |||||
| ~Convolution3x3CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| private: | |||||
| void FreeTmpBuffer() { | |||||
| if (tile_buffer_ != nullptr) { | if (tile_buffer_ != nullptr) { | ||||
| free(tile_buffer_); | free(tile_buffer_); | ||||
| tile_buffer_ = nullptr; | |||||
| } | } | ||||
| if (block_unit_buffer_ != nullptr) { | if (block_unit_buffer_ != nullptr) { | ||||
| free(block_unit_buffer_); | free(block_unit_buffer_); | ||||
| block_unit_buffer_ = nullptr; | |||||
| } | } | ||||
| if (tmp_dst_buffer_ != nullptr) { | if (tmp_dst_buffer_ != nullptr) { | ||||
| free(tmp_dst_buffer_); | free(tmp_dst_buffer_); | ||||
| tmp_dst_buffer_ = nullptr; | |||||
| } | |||||
| if (nhwc4_input_ != nullptr) { | |||||
| free(nhwc4_input_); | |||||
| nhwc4_input_ = nullptr; | |||||
| } | } | ||||
| if (nc4hw4_out_ != nullptr) { | if (nc4hw4_out_ != nullptr) { | ||||
| free(nc4hw4_out_); | free(nc4hw4_out_); | ||||
| nc4hw4_out_ = nullptr; | |||||
| } | } | ||||
| }; | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| } | |||||
| private: | |||||
| float *transformed_filter_addr_; | |||||
| float *tile_buffer_; | |||||
| float *block_unit_buffer_; | |||||
| float *tmp_dst_buffer_; | |||||
| float *nc4hw4_out_; | |||||
| float *transformed_filter_addr_ = nullptr; | |||||
| float *tile_buffer_ = nullptr; | |||||
| float *block_unit_buffer_ = nullptr; | |||||
| float *tmp_dst_buffer_ = nullptr; | |||||
| float *nc4hw4_out_ = nullptr; | |||||
| TmpBufferAddress tmp_buffer_address_list_[4]; | TmpBufferAddress tmp_buffer_address_list_[4]; | ||||
| GEMM_FUNC_FP32 gemm_func_ = nullptr; | GEMM_FUNC_FP32 gemm_func_ = nullptr; | ||||
| }; | }; | ||||
| @@ -29,8 +29,14 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | using mindspore::schema::PrimitiveType_DepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() { | |||||
| delete sliding_; | |||||
| ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() { FreeTmpBuffer(); } | |||||
| void ConvolutionDepthwiseCPUKernel::FreeTmpBuffer() { | |||||
| if (sliding_ != nullptr) { | |||||
| delete sliding_; | |||||
| sliding_ = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| @@ -105,10 +111,14 @@ int ConvolutionDepthwiseCPUKernel::InitBuffer() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseCPUKernel::Init() { | int ConvolutionDepthwiseCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwiseCPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -130,33 +140,6 @@ int ConvolutionDepthwiseCPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionDepthwiseCPUKernel::ReSize() { | |||||
| if (need_align_) { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| // conv base init | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| // init sliding window param | |||||
| sliding_ = new SlidingWindowParam; | |||||
| InitSlidingParamConvDw(sliding_, conv_param_, C4NUM); | |||||
| auto ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Convolution depthwise fp32 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ConvolutionDepthwiseCPUKernel::Execute(int task_id) { | int ConvolutionDepthwiseCPUKernel::Execute(int task_id) { | ||||
| ConvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, | ConvDwC4Fp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, | ||||
| sliding_, task_id); | sliding_, task_id); | ||||
| @@ -40,6 +40,7 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | SlidingWindowParam *sliding_ = nullptr; | ||||
| float *packed_weight_ = nullptr; | float *packed_weight_ = nullptr; | ||||
| float *packed_input_ = nullptr; | float *packed_input_ = nullptr; | ||||
| @@ -100,50 +100,56 @@ int ConvolutionDepthwise3x3CPUKernel::InitBuffer() { | |||||
| } | } | ||||
| int ConvolutionDepthwise3x3CPUKernel::Init() { | int ConvolutionDepthwise3x3CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| return RET_OK; | |||||
| } | |||||
| // conv base init | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| auto ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initWeightBias error!"; | |||||
| return ret; | |||||
| } | |||||
| // init threadNum; | |||||
| conv_param_->thread_num_ = MSMIN(thread_count_, UP_DIV(conv_param_->output_channel_, C4NUM)); | |||||
| ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initBuffer error!"; | |||||
| return ret; | |||||
| } | |||||
| // malloc one block buffer | // malloc one block buffer | ||||
| block_buffer_ = reinterpret_cast<float *>(malloc(thread_count_ * 16 * C4NUM * sizeof(float))); | block_buffer_ = reinterpret_cast<float *>(malloc(thread_count_ * 16 * C4NUM * sizeof(float))); | ||||
| if (block_buffer_ == nullptr) { | if (block_buffer_ == nullptr) { | ||||
| MS_LOG(ERROR) << "malloc block buffer failed."; | MS_LOG(ERROR) << "malloc block buffer failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_OK; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ConvolutionDepthwise3x3CPUKernel::ReSize() { | |||||
| void ConvolutionDepthwise3x3CPUKernel::FreeTmpBufer() { | |||||
| if (need_align_) { | if (need_align_) { | ||||
| free(packed_input_); | |||||
| free(packed_output_); | |||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| free(packed_output_); | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | } | ||||
| free(trans_buffer_); | |||||
| if (trans_buffer_ != nullptr) { | |||||
| free(trans_buffer_); | |||||
| trans_buffer_ = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | |||||
| } | |||||
| } | |||||
| int ConvolutionDepthwise3x3CPUKernel::ReSize() { | |||||
| FreeTmpBufer(); | |||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| auto ret = InitBuffer(); | |||||
| auto ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initWeightBias error!ret: " << ret; | |||||
| return ret; | |||||
| } | |||||
| // init threadNum; | |||||
| conv_param_->thread_num_ = MSMIN(thread_count_, UP_DIV(conv_param_->output_channel_, C4NUM)); | |||||
| ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initBuffer error!"; | |||||
| MS_LOG(ERROR) << "Depthwise3x3 fp32 initBuffer error!ret: " << ret; | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -31,13 +31,11 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ConvolutionDepthwise3x3CPUKernel() override { | ~ConvolutionDepthwise3x3CPUKernel() override { | ||||
| free(packed_weight_); | |||||
| if (need_align_) { | |||||
| free(packed_input_); | |||||
| free(packed_output_); | |||||
| FreeTmpBufer(); | |||||
| if (block_buffer_ != nullptr) { | |||||
| free(block_buffer_); | |||||
| block_buffer_ = nullptr; | |||||
| } | } | ||||
| free(block_buffer_); | |||||
| free(trans_buffer_); | |||||
| }; | }; | ||||
| int Init() override; | int Init() override; | ||||
| @@ -49,6 +47,7 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBufer(); | |||||
| float *packed_weight_ = nullptr; | float *packed_weight_ = nullptr; | ||||
| float *packed_input_ = nullptr; | float *packed_input_ = nullptr; | ||||
| float *packed_output_ = nullptr; | float *packed_output_ = nullptr; | ||||
| @@ -107,10 +107,20 @@ void ConvolutionSWCPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionSWCPUKernel::Init() { | int ConvolutionSWCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionSWCPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| if (nhwc4_input_ != nullptr) { | |||||
| free(nhwc4_input_); | |||||
| nhwc4_input_ = nullptr; | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| @@ -136,32 +146,6 @@ int ConvolutionSWCPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionSWCPUKernel::ReSize() { | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| } | |||||
| if (nhwc4_input_ != nullptr) { | |||||
| free(nhwc4_input_); | |||||
| } | |||||
| delete slidingWindow_param_; | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init tmp input, output | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init sliding window param | |||||
| slidingWindow_param_ = new SlidingWindowParam; | |||||
| InitSlidingParamConv(slidingWindow_param_, conv_param_, C4NUM); | |||||
| return RET_OK; | |||||
| } | |||||
| int ConvolutionSWCPUKernel::RunImpl(int task_id) { | int ConvolutionSWCPUKernel::RunImpl(int task_id) { | ||||
| auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->Data()); | auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->Data()); | ||||
| ConvSWFp32(reinterpret_cast<float *>(nhwc4_input_), packed_weight_, reinterpret_cast<float *>(bias_data_), | ConvSWFp32(reinterpret_cast<float *>(nhwc4_input_), packed_weight_, reinterpret_cast<float *>(bias_data_), | ||||
| @@ -32,15 +32,7 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ConvolutionSWCPUKernel() override { | |||||
| if (packed_weight_ != nullptr) { | |||||
| free(packed_weight_); | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| } | |||||
| delete slidingWindow_param_; | |||||
| }; | |||||
| ~ConvolutionSWCPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -51,9 +43,23 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| void ConfigInputOutput(); | void ConfigInputOutput(); | ||||
| private: | private: | ||||
| float *packed_weight_; | |||||
| float *tmp_output_block_; | |||||
| SlidingWindowParam *slidingWindow_param_; | |||||
| void FreeTmpBuffer() { | |||||
| if (packed_weight_ != nullptr) { | |||||
| free(packed_weight_); | |||||
| packed_weight_ = nullptr; | |||||
| } | |||||
| if (tmp_output_block_ != nullptr) { | |||||
| free(tmp_output_block_); | |||||
| tmp_output_block_ = nullptr; | |||||
| } | |||||
| if (slidingWindow_param_ != nullptr) { | |||||
| delete slidingWindow_param_; | |||||
| slidingWindow_param_ = nullptr; | |||||
| } | |||||
| } | |||||
| float *packed_weight_ = nullptr; | |||||
| float *tmp_output_block_ = nullptr; | |||||
| SlidingWindowParam *slidingWindow_param_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_SLIDEWINDOW_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CONVOLUTION_SLIDEWINDOW_H_ | ||||
| @@ -245,54 +245,17 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionWinogradCPUKernel::Init() { | int ConvolutionWinogradCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| kernel_unit_ = conv_param_->kernel_h_; | |||||
| input_unit_ = output_unit_ + kernel_unit_ - 1; | |||||
| conv_param_->input_unit_ = input_unit_; | |||||
| conv_param_->output_unit_ = output_unit_; | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // malloc tmp buffer | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = ConfigInputOutput(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConfigInputOutput failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ConvolutionWinogradCPUKernel::ReSize() { | int ConvolutionWinogradCPUKernel::ReSize() { | ||||
| if (tmp_data_ != nullptr) { | |||||
| free(tmp_data_); | |||||
| } | |||||
| if (trans_input_ != nullptr) { | |||||
| free(trans_input_); | |||||
| } | |||||
| if (gemm_out_ != nullptr) { | |||||
| free(gemm_out_); | |||||
| } | |||||
| if (tmp_out_data_ != nullptr) { | |||||
| free(tmp_out_data_); | |||||
| } | |||||
| FreeTmpBuffer(); | |||||
| if (nhwc4_input_ != nullptr) { | if (nhwc4_input_ != nullptr) { | ||||
| free(nhwc4_input_); | free(nhwc4_input_); | ||||
| nhwc4_input_ = nullptr; | |||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -305,6 +268,12 @@ int ConvolutionWinogradCPUKernel::ReSize() { | |||||
| conv_param_->input_unit_ = input_unit_; | conv_param_->input_unit_ = input_unit_; | ||||
| conv_param_->output_unit_ = output_unit_; | conv_param_->output_unit_ = output_unit_; | ||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // malloc tmp buffer | |||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | MS_LOG(ERROR) << "Init tmp buffer failed."; | ||||
| @@ -30,40 +30,51 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const lite::Primitive *primitive, int output_unit) | const lite::Primitive *primitive, int output_unit) | ||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit) {} | |||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive), output_unit_(output_unit), | |||||
| trans_weight_(nullptr) {} | |||||
| ~ConvolutionWinogradCPUKernel() override { | ~ConvolutionWinogradCPUKernel() override { | ||||
| FreeTmpBuffer(); | |||||
| }; | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||||
| int InitTmpBuffer(); | |||||
| int ConfigInputOutput(); | |||||
| private: | |||||
| void FreeTmpBuffer() { | |||||
| if (tmp_data_ != nullptr) { | if (tmp_data_ != nullptr) { | ||||
| free(tmp_data_); | free(tmp_data_); | ||||
| tmp_data_ = nullptr; | |||||
| } | } | ||||
| if (trans_input_ != nullptr) { | if (trans_input_ != nullptr) { | ||||
| free(trans_input_); | free(trans_input_); | ||||
| trans_input_ = nullptr; | |||||
| } | } | ||||
| if (gemm_out_ != nullptr) { | if (gemm_out_ != nullptr) { | ||||
| free(gemm_out_); | free(gemm_out_); | ||||
| gemm_out_ = nullptr; | |||||
| } | } | ||||
| if (tmp_out_data_ != nullptr) { | if (tmp_out_data_ != nullptr) { | ||||
| free(tmp_out_data_); | free(tmp_out_data_); | ||||
| tmp_out_data_ = nullptr; | |||||
| } | } | ||||
| delete trans_weight_; | |||||
| }; | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| int InitWeightBias(); | |||||
| int MallocFilterMatrix(int oc_block, int oc_block_num); | |||||
| int InitTmpBuffer(); | |||||
| int ConfigInputOutput(); | |||||
| private: | |||||
| if (trans_weight_ != nullptr) { | |||||
| delete trans_weight_; | |||||
| trans_weight_ = nullptr; | |||||
| } | |||||
| } | |||||
| int kernel_unit_; | int kernel_unit_; | ||||
| int input_unit_; | int input_unit_; | ||||
| int output_unit_; | int output_unit_; | ||||
| float *tmp_data_; | |||||
| float *trans_input_; | |||||
| float *gemm_out_; | |||||
| float *tmp_out_data_; | |||||
| Matrix *trans_weight_; | |||||
| float *tmp_data_ = nullptr; | |||||
| float *trans_input_ = nullptr; | |||||
| float *gemm_out_ = nullptr; | |||||
| float *tmp_out_data_ = nullptr; | |||||
| Matrix *trans_weight_ = nullptr; | |||||
| InputTransformUnitFunc input_trans_func_; | InputTransformUnitFunc input_trans_func_; | ||||
| OutputTransformUnitFunc output_trans_func_; | OutputTransformUnitFunc output_trans_func_; | ||||
| TmpBufferAddress tmp_buffer_address_list_[5]; | TmpBufferAddress tmp_buffer_address_list_[5]; | ||||
| @@ -25,7 +25,9 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DeConv2D; | using mindspore::schema::PrimitiveType_DeConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { | |||||
| DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { FreeTmpBuffer(); } | |||||
| void DeConvolutionCPUKernel::FreeTmpBuffer() { | |||||
| if (weight_ptr_ != nullptr) { | if (weight_ptr_ != nullptr) { | ||||
| free(weight_ptr_); | free(weight_ptr_); | ||||
| weight_ptr_ = nullptr; | weight_ptr_ = nullptr; | ||||
| @@ -42,24 +44,23 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { | |||||
| free(pack_output_); | free(pack_output_); | ||||
| pack_output_ = nullptr; | pack_output_ = nullptr; | ||||
| } | } | ||||
| return; | |||||
| } | } | ||||
| int DeConvolutionCPUKernel::ReSize() { | int DeConvolutionCPUKernel::ReSize() { | ||||
| if (tmp_buffer_ != nullptr) { | |||||
| free(tmp_buffer_); | |||||
| tmp_buffer_ = nullptr; | |||||
| } | |||||
| if (pack_input_ != nullptr) { | |||||
| free(pack_input_); | |||||
| pack_input_ = nullptr; | |||||
| } | |||||
| if (pack_output_ != nullptr) { | |||||
| free(pack_output_); | |||||
| pack_output_ = nullptr; | |||||
| FreeTmpBuffer(); | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| int error_code = InitParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitParam error!ret: " << error_code; | |||||
| return error_code; | |||||
| } | } | ||||
| InitParam(); | |||||
| error_code = InitWeightBias(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code; | |||||
| return error_code; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -151,24 +152,10 @@ int DeConvolutionCPUKernel::DoDeconv(int task_id) { | |||||
| } | } | ||||
| int DeConvolutionCPUKernel::Init() { | int DeConvolutionCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| int error_code = InitParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitParam error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitWeightBias(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv InitWeightBias error!"; | |||||
| return error_code; | |||||
| } | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int DeConvolutionCPUKernel::Run() { | int DeConvolutionCPUKernel::Run() { | ||||
| @@ -47,20 +47,21 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| private: | private: | ||||
| int InitParam(); | int InitParam(); | ||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| void FreeTmpBuffer(); | |||||
| private: | private: | ||||
| MatMulParameter *matmul_param_; | |||||
| MatMulParameter *matmul_param_ = nullptr; | |||||
| int input_plane_; | int input_plane_; | ||||
| int kernel_plane_; | int kernel_plane_; | ||||
| int output_plane_; | int output_plane_; | ||||
| int thread_count_; | int thread_count_; | ||||
| int thread_stride_; | int thread_stride_; | ||||
| float *weight_ptr_; | |||||
| float *pack_input_; | |||||
| float *pack_output_; | |||||
| float *tmp_buffer_; | |||||
| float *input_ptr_; | |||||
| float *output_ptr_; | |||||
| float *weight_ptr_ = nullptr; | |||||
| float *pack_input_ = nullptr; | |||||
| float *pack_output_ = nullptr; | |||||
| float *tmp_buffer_ = nullptr; | |||||
| float *input_ptr_ = nullptr; | |||||
| float *output_ptr_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_ | #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_ | ||||
| @@ -27,8 +27,14 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { | |||||
| delete sliding_; | |||||
| DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { FreeTmpBuffer(); } | |||||
| void DeconvolutionDepthwiseCPUKernel::FreeTmpBuffer() { | |||||
| if (sliding_ != nullptr) { | |||||
| delete sliding_; | |||||
| sliding_ = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| @@ -120,48 +126,28 @@ int DeconvolutionDepthwiseCPUKernel::InitBuffer() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseCPUKernel::Init() { | int DeconvolutionDepthwiseCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int DeconvolutionDepthwiseCPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| InitSlideParam(); | InitSlideParam(); | ||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| auto ret = InitWeightBias(); | auto ret = InitWeightBias(); | ||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitWeightBias failed."; | |||||
| return RET_ERROR; | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitWeightBias failed.ret: " << ret; | |||||
| return ret; | |||||
| } | } | ||||
| ret = InitBuffer(); | ret = InitBuffer(); | ||||
| if (ret != 0) { | if (ret != 0) { | ||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int DeconvolutionDepthwiseCPUKernel::ReSize() { | |||||
| if (need_align_) { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| InitSlideParam(); | |||||
| // conv base init | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| auto ret = InitBuffer(); | |||||
| if (ret != 0) { | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed."; | |||||
| return RET_ERROR; | |||||
| MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitBuffer failed.ret: " << ret; | |||||
| return ret; | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -41,10 +41,11 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| SlidingWindowParam *sliding_; | |||||
| float *packed_weight_; | |||||
| float *packed_input_; | |||||
| float *packed_output_; | |||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding_ = nullptr; | |||||
| float *packed_weight_ = nullptr; | |||||
| float *packed_input_ = nullptr; | |||||
| float *packed_output_ = nullptr; | |||||
| bool need_align_ = false; | bool need_align_ = false; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,13 +28,10 @@ using mindspore::schema::PrimitiveType_Flatten; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int FlattenCPUKernel::Init() { | int FlattenCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ReSize(); | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int FlattenCPUKernel::ReSize() { | int FlattenCPUKernel::ReSize() { | ||||
| @@ -99,10 +99,14 @@ int LstmCPUKernel::InitWeightBias() { | |||||
| } | } | ||||
| int LstmCPUKernel::Init() { | int LstmCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int LstmCPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| auto ret = InitParam(); | auto ret = InitParam(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "LstmCPUKernel InitParam error."; | MS_LOG(ERROR) << "LstmCPUKernel InitParam error."; | ||||
| @@ -123,23 +127,6 @@ int LstmCPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int LstmCPUKernel::ReSize() { | |||||
| free(gate_buffer_); | |||||
| auto ret = InitParam(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "LstmCPUKernel InitParam error."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "LstmCPUKernel InitBuffer error."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int LstmCPUKernel::Run() { | int LstmCPUKernel::Run() { | ||||
| auto prepare_ret = Prepare(); | auto prepare_ret = Prepare(); | ||||
| if (prepare_ret != RET_OK) { | if (prepare_ret != RET_OK) { | ||||
| @@ -170,13 +157,16 @@ int LstmCPUKernel::Run() { | |||||
| } | } | ||||
| kernel::LiteKernel *CpuLstmKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuLstmKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *opParameter, | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | |||||
| const lite::Context *ctx, const kernel::KernelKey &desc, | const lite::Context *ctx, const kernel::KernelKey &desc, | ||||
| const lite::Primitive *primitive) { | const lite::Primitive *primitive) { | ||||
| MS_ASSERT(opParameter != nullptr); | |||||
| if (parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "Input parameter is nullptr!"; | |||||
| return nullptr; | |||||
| } | |||||
| MS_ASSERT(desc.type == schema::PrimitiveType_Lstm); | MS_ASSERT(desc.type == schema::PrimitiveType_Lstm); | ||||
| auto *kernel = new (std::nothrow) LstmCPUKernel(opParameter, inputs, outputs, ctx, primitive); | |||||
| auto *kernel = new (std::nothrow) LstmCPUKernel(parameter, inputs, outputs, ctx, primitive); | |||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| MS_LOG(ERROR) << "kernel is nullptr."; | MS_LOG(ERROR) << "kernel is nullptr."; | ||||
| return nullptr; | return nullptr; | ||||
| @@ -184,8 +174,8 @@ kernel::LiteKernel *CpuLstmKernelCreator(const std::vector<lite::tensor::Tensor | |||||
| auto ret = kernel->Init(); | auto ret = kernel->Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| delete kernel; | delete kernel; | ||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(opParameter->type_)); | |||||
| MS_LOG(ERROR) << "Init kernel failed, name: " << parameter->name_ << ", type: " | |||||
| << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(parameter->type_)); | |||||
| return nullptr; | return nullptr; | ||||
| } | } | ||||
| return kernel; | return kernel; | ||||
| @@ -31,12 +31,7 @@ class LstmCPUKernel : public LiteKernel { | |||||
| lstm_parm_ = reinterpret_cast<LstmParameter *>(op_parameter_); | lstm_parm_ = reinterpret_cast<LstmParameter *>(op_parameter_); | ||||
| } | } | ||||
| ~LstmCPUKernel() override { | |||||
| free(gate_buffer_); | |||||
| free(weight_i_ptr_); | |||||
| free(weight_h_ptr_); | |||||
| free(bias_ptr_); | |||||
| } | |||||
| ~LstmCPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -47,11 +42,29 @@ class LstmCPUKernel : public LiteKernel { | |||||
| int InitWeightBias(); | int InitWeightBias(); | ||||
| private: | private: | ||||
| float *gate_buffer_; | |||||
| float *weight_i_ptr_; | |||||
| float *weight_h_ptr_; | |||||
| float *bias_ptr_; | |||||
| LstmParameter *lstm_parm_; | |||||
| void FreeTmpBuffer() { | |||||
| if (gate_buffer_ != nullptr) { | |||||
| free(gate_buffer_); | |||||
| gate_buffer_ = nullptr; | |||||
| } | |||||
| if (weight_i_ptr_ != nullptr) { | |||||
| free(weight_i_ptr_); | |||||
| weight_i_ptr_ = nullptr; | |||||
| } | |||||
| if (weight_h_ptr_ != nullptr) { | |||||
| free(weight_h_ptr_); | |||||
| weight_h_ptr_ = nullptr; | |||||
| } | |||||
| if (bias_ptr_ != nullptr) { | |||||
| free(bias_ptr_); | |||||
| bias_ptr_ = nullptr; | |||||
| } | |||||
| } | |||||
| float *gate_buffer_ = nullptr; | |||||
| float *weight_i_ptr_ = nullptr; | |||||
| float *weight_h_ptr_ = nullptr; | |||||
| float *bias_ptr_ = nullptr; | |||||
| LstmParameter *lstm_parm_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -29,23 +29,22 @@ using mindspore::schema::PrimitiveType_Pooling; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int PoolingCPUKernel::Init() { | int PoolingCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| return RET_OK; | |||||
| } | |||||
| auto ret = PoolingBaseCPUKernel::Init(); | auto ret = PoolingBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "PoolingBase Init failed."; | MS_LOG(ERROR) << "PoolingBase Init failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_OK; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | } | ||||
| int PoolingCPUKernel::ReSize() { | int PoolingCPUKernel::ReSize() { | ||||
| auto ret = Init(); | |||||
| auto ret = PoolingBaseCPUKernel::ReSize(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Pooling resize init failed."; | |||||
| return RET_ERROR; | |||||
| MS_LOG(ERROR) << "PoolingBase ReSize fai1!ret: " << ret; | |||||
| return ret; | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -25,10 +25,15 @@ using mindspore::schema::PrimitiveType_TopK; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int TopKCPUKernel::Init() { | int TopKCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||||
| parameter->topk_node_list_ = nullptr; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int TopKCPUKernel::ReSize() { | |||||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | ||||
| lite::tensor::Tensor *input = in_tensors_.at(0); | lite::tensor::Tensor *input = in_tensors_.at(0); | ||||
| parameter->last_dim_size_ = input->shape()[input->shape().size() - 1]; | parameter->last_dim_size_ = input->shape()[input->shape().size() - 1]; | ||||
| @@ -37,6 +42,10 @@ int TopKCPUKernel::Init() { | |||||
| parameter->loop_num_ *= input->shape()[i]; | parameter->loop_num_ *= input->shape()[i]; | ||||
| } | } | ||||
| if (parameter->topk_node_list_ != nullptr) { | |||||
| free(parameter->topk_node_list_); | |||||
| parameter->topk_node_list_ = nullptr; | |||||
| } | |||||
| parameter->topk_node_list_ = malloc(sizeof(TopkNode) * parameter->last_dim_size_); | parameter->topk_node_list_ = malloc(sizeof(TopkNode) * parameter->last_dim_size_); | ||||
| if (parameter->topk_node_list_ == nullptr) { | if (parameter->topk_node_list_ == nullptr) { | ||||
| MS_LOG(ERROR) << "malloc fail."; | MS_LOG(ERROR) << "malloc fail."; | ||||
| @@ -45,8 +54,6 @@ int TopKCPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int TopKCPUKernel::ReSize() { return RET_OK; } | |||||
| int TopKCPUKernel::Run() { | int TopKCPUKernel::Run() { | ||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -65,7 +72,10 @@ kernel::LiteKernel *CpuTopKFp32KernelCreator(const std::vector<lite::tensor::Ten | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | const std::vector<lite::tensor::Tensor *> &outputs, OpParameter *parameter, | ||||
| const lite::Context *ctx, const KernelKey &desc, | const lite::Context *ctx, const KernelKey &desc, | ||||
| const lite::Primitive *primitive) { | const lite::Primitive *primitive) { | ||||
| MS_ASSERT(parameter != nullptr); | |||||
| if (parameter == nullptr) { | |||||
| MS_LOG(ERROR) << "input parameter is nullptr!"; | |||||
| return nullptr; | |||||
| } | |||||
| MS_ASSERT(desc.type == PrimitiveType_Tile); | MS_ASSERT(desc.type == PrimitiveType_Tile); | ||||
| auto *kernel = new (std::nothrow) TopKCPUKernel(parameter, inputs, outputs, ctx, primitive); | auto *kernel = new (std::nothrow) TopKCPUKernel(parameter, inputs, outputs, ctx, primitive); | ||||
| if (kernel == nullptr) { | if (kernel == nullptr) { | ||||
| @@ -24,10 +24,13 @@ using mindspore::schema::PrimitiveType_Unstack; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int UnstackCPUKernel::Init() { | int UnstackCPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int UnstackCPUKernel::ReSize() { | |||||
| auto input = in_tensors_.at(0); | auto input = in_tensors_.at(0); | ||||
| MS_ASSERT(input != nullptr); | MS_ASSERT(input != nullptr); | ||||
| size_t shape_size = input->shape().size(); | size_t shape_size = input->shape().size(); | ||||
| @@ -48,7 +51,10 @@ int UnstackCPUKernel::Init() { | |||||
| para->axis_dim_ = input->DimensionSize(i); | para->axis_dim_ = input->DimensionSize(i); | ||||
| } | } | ||||
| } | } | ||||
| if (output_addr_array_ != nullptr) { | |||||
| free(output_addr_array_); | |||||
| output_addr_array_ = nullptr; | |||||
| } | |||||
| output_addr_array_ = reinterpret_cast<float **>(malloc(sizeof(float *) * out_tensors_.size())); | output_addr_array_ = reinterpret_cast<float **>(malloc(sizeof(float *) * out_tensors_.size())); | ||||
| if (output_addr_array_ == nullptr) { | if (output_addr_array_ == nullptr) { | ||||
| MS_LOG(ERROR) << "Failed to malloc memory"; | MS_LOG(ERROR) << "Failed to malloc memory"; | ||||
| @@ -57,8 +63,6 @@ int UnstackCPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int UnstackCPUKernel::ReSize() { return RET_OK; } | |||||
| int UnstackCPUKernel::Run() { | int UnstackCPUKernel::Run() { | ||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -34,7 +34,7 @@ class UnstackCPUKernel : public LiteKernel { | |||||
| int Run() override; | int Run() override; | ||||
| private: | private: | ||||
| float **output_addr_array_; | |||||
| float **output_addr_array_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,11 +28,6 @@ using mindspore::lite::RET_OK; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int ArithmeticSelfInt8CPUKernel::Init() { | int ArithmeticSelfInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| return RET_OK; | |||||
| } | |||||
| int ret = ReSize(); | |||||
| auto *input_tensor = in_tensors_.at(kInputIndex); | auto *input_tensor = in_tensors_.at(kInputIndex); | ||||
| auto in_quant_args = input_tensor->GetQuantParams(); | auto in_quant_args = input_tensor->GetQuantParams(); | ||||
| para_->quant_arg_.in_args_.scale_ = in_quant_args.front().scale; | para_->quant_arg_.in_args_.scale_ = in_quant_args.front().scale; | ||||
| @@ -57,7 +52,10 @@ int ArithmeticSelfInt8CPUKernel::Init() { | |||||
| para_->quant_arg_.shift_right_ = right_shift > 0 ? right_shift : 0; | para_->quant_arg_.shift_right_ = right_shift > 0 ? right_shift : 0; | ||||
| } | } | ||||
| return ret; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ArithmeticSelfInt8CPUKernel::ReSize() { | int ArithmeticSelfInt8CPUKernel::ReSize() { | ||||
| @@ -26,10 +26,13 @@ using mindspore::schema::PrimitiveType_BiasAdd; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int BiasAddInt8CPUKernel::Init() { | int BiasAddInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int BiasAddInt8CPUKernel::ReSize() { | |||||
| auto bias_param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | auto bias_param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | ||||
| auto dims = in_tensors_[0]->shape(); | auto dims = in_tensors_[0]->shape(); | ||||
| bias_param->ndim_ = dims.size(); | bias_param->ndim_ = dims.size(); | ||||
| @@ -39,11 +42,9 @@ int BiasAddInt8CPUKernel::Init() { | |||||
| bias_param->out_shape_[i] = dims[i]; | bias_param->out_shape_[i] = dims[i]; | ||||
| } | } | ||||
| bias_param->in_shape1_[3] = dims[3]; | bias_param->in_shape1_[3] = dims[3]; | ||||
| return NNACL_OK; | |||||
| return RET_OK; | |||||
| } | } | ||||
| int BiasAddInt8CPUKernel::ReSize() { return NNACL_OK; } | |||||
| int BiasAddInt8CPUKernel::Run() { | int BiasAddInt8CPUKernel::Run() { | ||||
| auto ret = Prepare(); | auto ret = Prepare(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -43,28 +43,36 @@ void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParamete | |||||
| free(tmp_addr); | free(tmp_addr); | ||||
| } | } | ||||
| Convolution3x3Int8CPUKernel::~Convolution3x3Int8CPUKernel() { | |||||
| void Convolution3x3Int8CPUKernel::FreeTmpBuffer() { | |||||
| if (transformed_filter_addr_ != nullptr) { | if (transformed_filter_addr_ != nullptr) { | ||||
| free(transformed_filter_addr_); | free(transformed_filter_addr_); | ||||
| transformed_filter_addr_ = nullptr; | |||||
| } | } | ||||
| if (input_data_ != nullptr) { | if (input_data_ != nullptr) { | ||||
| free(input_data_); | free(input_data_); | ||||
| input_data_ = nullptr; | |||||
| } | } | ||||
| if (tile_buffer_ != nullptr) { | if (tile_buffer_ != nullptr) { | ||||
| free(tile_buffer_); | free(tile_buffer_); | ||||
| tile_buffer_ = nullptr; | |||||
| } | } | ||||
| if (block_unit_buffer_ != nullptr) { | if (block_unit_buffer_ != nullptr) { | ||||
| free(block_unit_buffer_); | free(block_unit_buffer_); | ||||
| block_unit_buffer_ = nullptr; | |||||
| } | } | ||||
| if (tmp_dst_buffer_ != nullptr) { | if (tmp_dst_buffer_ != nullptr) { | ||||
| free(tmp_dst_buffer_); | free(tmp_dst_buffer_); | ||||
| tmp_dst_buffer_ = nullptr; | |||||
| } | } | ||||
| if (tmp_out_ != nullptr) { | if (tmp_out_ != nullptr) { | ||||
| free(tmp_out_); | free(tmp_out_); | ||||
| tmp_out_ = nullptr; | |||||
| } | } | ||||
| FreeQuantParam(); | FreeQuantParam(); | ||||
| } | } | ||||
| Convolution3x3Int8CPUKernel::~Convolution3x3Int8CPUKernel() { FreeTmpBuffer(); } | |||||
| int Convolution3x3Int8CPUKernel::InitWeightBias() { | int Convolution3x3Int8CPUKernel::InitWeightBias() { | ||||
| auto input_channel = conv_param_->input_channel_; | auto input_channel = conv_param_->input_channel_; | ||||
| auto output_channel = conv_param_->output_channel_; | auto output_channel = conv_param_->output_channel_; | ||||
| @@ -161,10 +169,15 @@ void Convolution3x3Int8CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int Convolution3x3Int8CPUKernel::Init() { | int Convolution3x3Int8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int Convolution3x3Int8CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| @@ -191,37 +204,6 @@ int Convolution3x3Int8CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int Convolution3x3Int8CPUKernel::ReSize() { | |||||
| if (input_data_ != nullptr) { | |||||
| free(input_data_); | |||||
| } | |||||
| if (tile_buffer_ != nullptr) { | |||||
| free(tile_buffer_); | |||||
| } | |||||
| if (block_unit_buffer_ != nullptr) { | |||||
| free(block_unit_buffer_); | |||||
| } | |||||
| if (tmp_dst_buffer_ != nullptr) { | |||||
| free(tmp_dst_buffer_); | |||||
| } | |||||
| if (tmp_out_ != nullptr) { | |||||
| free(tmp_out_); | |||||
| } | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init tmp input, output | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int Convolution3x3Int8CPUKernel::RunImpl(int task_id) { | int Convolution3x3Int8CPUKernel::RunImpl(int task_id) { | ||||
| auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->Data()); | auto output_addr = reinterpret_cast<int8_t *>(out_tensors_.at(kOutputIndex)->Data()); | ||||
| Conv3x3Int8(input_data_, transformed_filter_addr_, reinterpret_cast<int32_t *>(bias_data_), output_addr, tile_buffer_, | Conv3x3Int8(input_data_, transformed_filter_addr_, reinterpret_cast<int32_t *>(bias_data_), output_addr, tile_buffer_, | ||||
| @@ -41,12 +41,13 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| void ConfigInputOutput(); | void ConfigInputOutput(); | ||||
| private: | private: | ||||
| int16_t *transformed_filter_addr_; | |||||
| int16_t *input_data_; | |||||
| int16_t *tile_buffer_; | |||||
| int16_t *block_unit_buffer_; | |||||
| int32_t *tmp_dst_buffer_; | |||||
| int8_t *tmp_out_; | |||||
| void FreeTmpBuffer(); | |||||
| int16_t *transformed_filter_addr_ = nullptr; | |||||
| int16_t *input_data_ = nullptr; | |||||
| int16_t *tile_buffer_ = nullptr; | |||||
| int16_t *block_unit_buffer_ = nullptr; | |||||
| int32_t *tmp_dst_buffer_ = nullptr; | |||||
| int8_t *tmp_out_ = nullptr; | |||||
| }; | }; | ||||
| void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); | void ProcessFilterUint8(int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,8 +28,12 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DepthwiseConv2D; | using mindspore::schema::PrimitiveType_DepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | |||||
| delete sliding; | |||||
| void ConvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { | |||||
| if (sliding != nullptr) { | |||||
| delete sliding; | |||||
| sliding = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| @@ -46,6 +50,8 @@ ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { | |||||
| } | } | ||||
| } | } | ||||
| ConvolutionDepthwiseInt8CPUKernel::~ConvolutionDepthwiseInt8CPUKernel() { FreeTmpBuffer(); } | |||||
| int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { | ||||
| // init weight, int8 -> int16 | // init weight, int8 -> int16 | ||||
| // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 | // o, h, w, i -> o/8, h, w, i, 8; o == group, i == 1 | ||||
| @@ -99,10 +105,15 @@ int ConvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||||
| } | } | ||||
| int ConvolutionDepthwiseInt8CPUKernel::Init() { | int ConvolutionDepthwiseInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| // conv base init | // conv base init | ||||
| ConvolutionBaseCPUKernel::Init(); | ConvolutionBaseCPUKernel::Init(); | ||||
| @@ -132,35 +143,6 @@ int ConvolutionDepthwiseInt8CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int ConvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (need_align_) { | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| // conv base init | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| // init sliding window param | |||||
| InitSlidingParamConvDw(sliding, conv_param_, C4NUM); | |||||
| // init quant param | |||||
| ConvolutionBaseCPUKernel::SetQuantParam(); | |||||
| auto ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Depthwise int8 ReSize error!"; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int ConvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { | int ConvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { | ||||
| ConvDwInt8(packed_output_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), conv_param_, | ConvDwInt8(packed_output_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), conv_param_, | ||||
| sliding, task_id); | sliding, task_id); | ||||
| @@ -40,10 +40,11 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| SlidingWindowParam *sliding; | |||||
| int16_t *packed_weight_; | |||||
| int16_t *packed_input_; | |||||
| int8_t *packed_output_; | |||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding = nullptr; | |||||
| int16_t *packed_weight_ = nullptr; | |||||
| int16_t *packed_input_ = nullptr; | |||||
| int8_t *packed_output_ = nullptr; | |||||
| bool need_align_ = false; | bool need_align_ = false; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -293,46 +293,10 @@ void ConvolutionInt8CPUKernel::ConfigInputOutput() { | |||||
| } | } | ||||
| int ConvolutionInt8CPUKernel::Init() { | int ConvolutionInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // config input output | |||||
| ConfigInputOutput(); | |||||
| CheckSupportOptimize(); | |||||
| ret = SetQuantParam(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Set quant param failed."; | |||||
| return ret; | |||||
| } | |||||
| // init for opt | |||||
| if (support_optimize_) { | |||||
| ret = InitOpt(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Initialization for optimized int8 conv failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| // init for situation that not support sdot | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init tmp input, output | |||||
| ret = InitTmpBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init tmp buffer failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int ConvolutionInt8CPUKernel::InitOpt() { | int ConvolutionInt8CPUKernel::InitOpt() { | ||||
| @@ -351,32 +315,37 @@ int ConvolutionInt8CPUKernel::InitOpt() { | |||||
| } | } | ||||
| int ConvolutionInt8CPUKernel::ReSize() { | int ConvolutionInt8CPUKernel::ReSize() { | ||||
| if (packed_input_ != nullptr) { | |||||
| free(packed_input_); | |||||
| } | |||||
| if (input_sum_ != nullptr) { | |||||
| free(input_sum_); | |||||
| } | |||||
| if (tmp_dst_ != nullptr) { | |||||
| free(tmp_dst_); | |||||
| } | |||||
| if (tmp_out_ != nullptr) { | |||||
| free(tmp_out_); | |||||
| } | |||||
| FreeTmpBuffer(); | |||||
| auto ret = ConvolutionBaseCPUKernel::Init(); | auto ret = ConvolutionBaseCPUKernel::Init(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "ConvolutionBase init failed."; | MS_LOG(ERROR) << "ConvolutionBase init failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| // config input output | |||||
| ConfigInputOutput(); | |||||
| CheckSupportOptimize(); | |||||
| ret = SetQuantParam(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Set quant param failed."; | |||||
| return ret; | |||||
| } | |||||
| // init for opt | |||||
| if (support_optimize_) { | if (support_optimize_) { | ||||
| ret = InitTmpBufferOpt(); | |||||
| ret = InitOpt(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Init tmp buffer for opt failed."; | |||||
| MS_LOG(ERROR) << "Initialization for optimized int8 conv failed."; | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| // init for situation that not support sdot | |||||
| ret = InitWeightBias(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init weight bias failed."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| // init tmp input, output | // init tmp input, output | ||||
| ret = InitTmpBuffer(); | ret = InitTmpBuffer(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -30,38 +30,44 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ConvolutionInt8CPUKernel() override { | |||||
| ~ConvolutionInt8CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| void CheckSupportOptimize(); | |||||
| int InitOpt(); | |||||
| int InitWeightBiasOpt(); | |||||
| int InitTmpBufferOpt(); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| private: | |||||
| void FreeTmpBuffer() { | |||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| free(packed_weight_); | free(packed_weight_); | ||||
| packed_weight_ = nullptr; | |||||
| } | } | ||||
| if (packed_input_ != nullptr) { | if (packed_input_ != nullptr) { | ||||
| free(packed_input_); | free(packed_input_); | ||||
| packed_input_ = nullptr; | |||||
| } | } | ||||
| if (input_sum_ != nullptr) { | if (input_sum_ != nullptr) { | ||||
| free(input_sum_); | free(input_sum_); | ||||
| input_sum_ = nullptr; | |||||
| } | } | ||||
| if (tmp_dst_ != nullptr) { | if (tmp_dst_ != nullptr) { | ||||
| free(tmp_dst_); | free(tmp_dst_); | ||||
| tmp_dst_ = nullptr; | |||||
| } | } | ||||
| if (tmp_out_ != nullptr) { | if (tmp_out_ != nullptr) { | ||||
| free(tmp_out_); | free(tmp_out_); | ||||
| tmp_out_ = nullptr; | |||||
| } | } | ||||
| FreeQuantParam(); | FreeQuantParam(); | ||||
| }; | |||||
| int Init() override; | |||||
| int ReSize() override; | |||||
| int Run() override; | |||||
| int RunImpl(int task_id); | |||||
| void CheckSupportOptimize(); | |||||
| int InitOpt(); | |||||
| int InitWeightBiasOpt(); | |||||
| int InitTmpBufferOpt(); | |||||
| int InitWeightBias(); | |||||
| int InitTmpBuffer(); | |||||
| void ConfigInputOutput(); | |||||
| private: | |||||
| } | |||||
| bool support_optimize_ = true; | bool support_optimize_ = true; | ||||
| int8_t *packed_weight_ = nullptr; | int8_t *packed_weight_ = nullptr; | ||||
| int8_t *packed_input_ = nullptr; | int8_t *packed_input_ = nullptr; | ||||
| @@ -28,8 +28,13 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | using mindspore::schema::PrimitiveType_DeDepthwiseConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { | |||||
| delete sliding; | |||||
| DeconvolutionDepthwiseInt8CPUKernel::~DeconvolutionDepthwiseInt8CPUKernel() { FreeTmpBuffer(); } | |||||
| void DeconvolutionDepthwiseInt8CPUKernel::FreeTmpBuffer() { | |||||
| if (sliding != nullptr) { | |||||
| delete sliding; | |||||
| sliding = nullptr; | |||||
| } | |||||
| if (packed_weight_ != nullptr) { | if (packed_weight_ != nullptr) { | ||||
| delete packed_weight_; | delete packed_weight_; | ||||
| packed_weight_ = nullptr; | packed_weight_ = nullptr; | ||||
| @@ -137,10 +142,15 @@ int DeconvolutionDepthwiseInt8CPUKernel::InitBuffer() { | |||||
| } | } | ||||
| int DeconvolutionDepthwiseInt8CPUKernel::Init() { | int DeconvolutionDepthwiseInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| sliding = new SlidingWindowParam; | sliding = new SlidingWindowParam; | ||||
| InitSlideParam(); | InitSlideParam(); | ||||
| @@ -169,35 +179,6 @@ int DeconvolutionDepthwiseInt8CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int DeconvolutionDepthwiseInt8CPUKernel::ReSize() { | |||||
| if (packed_input_ != nullptr) { | |||||
| delete packed_input_; | |||||
| packed_input_ = nullptr; | |||||
| } | |||||
| if (need_align_) { | |||||
| if (packed_output_ != nullptr) { | |||||
| delete packed_output_; | |||||
| packed_output_ = nullptr; | |||||
| } | |||||
| } | |||||
| if (output_buffer_ != nullptr) { | |||||
| delete output_buffer_; | |||||
| output_buffer_ = nullptr; | |||||
| } | |||||
| InitSlideParam(); | |||||
| // conv base init | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| auto ret = InitBuffer(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Deconv Depthwise int8 InitBuffer error!"; | |||||
| return ret; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int DeconvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { | int DeconvolutionDepthwiseInt8CPUKernel::Execute(int task_id) { | ||||
| DeconvDwInt8(packed_output_, output_buffer_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), | DeconvDwInt8(packed_output_, output_buffer_, packed_input_, packed_weight_, reinterpret_cast<int32_t *>(bias_data_), | ||||
| conv_param_, sliding, task_id); | conv_param_, sliding, task_id); | ||||
| @@ -41,11 +41,12 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int Execute(int task_id); | int Execute(int task_id); | ||||
| private: | private: | ||||
| SlidingWindowParam *sliding; | |||||
| int16_t *packed_weight_; | |||||
| int16_t *packed_input_; | |||||
| int8_t *packed_output_; | |||||
| int32_t *output_buffer_; | |||||
| void FreeTmpBuffer(); | |||||
| SlidingWindowParam *sliding = nullptr; | |||||
| int16_t *packed_weight_ = nullptr; | |||||
| int16_t *packed_input_ = nullptr; | |||||
| int8_t *packed_output_ = nullptr; | |||||
| int32_t *output_buffer_ = nullptr; | |||||
| bool need_align_ = false; | bool need_align_ = false; | ||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -27,7 +27,9 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_DeConv2D; | using mindspore::schema::PrimitiveType_DeConv2D; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| DeConvInt8CPUKernel::~DeConvInt8CPUKernel() { | |||||
| DeConvInt8CPUKernel::~DeConvInt8CPUKernel() { FreeTmpBuffer(); } | |||||
| void DeConvInt8CPUKernel::FreeTmpBuffer() { | |||||
| if (weight_ptr_ != nullptr) { | if (weight_ptr_ != nullptr) { | ||||
| free(weight_ptr_); | free(weight_ptr_); | ||||
| weight_ptr_ = nullptr; | weight_ptr_ = nullptr; | ||||
| @@ -47,7 +49,35 @@ DeConvInt8CPUKernel::~DeConvInt8CPUKernel() { | |||||
| ConvolutionBaseCPUKernel::FreeQuantParam(); | ConvolutionBaseCPUKernel::FreeQuantParam(); | ||||
| } | } | ||||
| int DeConvInt8CPUKernel::ReSize() { return RET_OK; } | |||||
| int DeConvInt8CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| int error_code = ConvolutionBaseCPUKernel::SetQuantParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 SetQuantParam error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 InitParam error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitBiasWeight(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 InitBiasWeight error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitData(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 InitData error!"; | |||||
| return error_code; | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int DeConvInt8CPUKernel::InitParam() { | int DeConvInt8CPUKernel::InitParam() { | ||||
| fc_param_ = new MatMulParameter(); | fc_param_ = new MatMulParameter(); | ||||
| @@ -115,35 +145,10 @@ int DeConvInt8CPUKernel::InitData() { | |||||
| } | } | ||||
| int DeConvInt8CPUKernel::Init() { | int DeConvInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| ConvolutionBaseCPUKernel::Init(); | |||||
| int error_code = ConvolutionBaseCPUKernel::SetQuantParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 SetQuantParam error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitParam(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 InitParam error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitBiasWeight(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 InitBiasWeight error!"; | |||||
| return error_code; | |||||
| } | |||||
| error_code = InitData(); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "deconv int8 InitData error!"; | |||||
| return error_code; | |||||
| } | |||||
| return RET_OK; | |||||
| return ReSize(); | |||||
| } | } | ||||
| int DeConvInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | int DeConvInt8Run(int task_id, LiteParallelGroupEnv *penv, void *cdata) { | ||||
| @@ -51,12 +51,13 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { | |||||
| int InitBiasWeight(); | int InitBiasWeight(); | ||||
| private: | private: | ||||
| MatMulParameter *fc_param_; | |||||
| int8_t *weight_ptr_; | |||||
| int8_t *input_ptr_; /* record c8 input*/ | |||||
| int32_t *tmp_buffer_; /* record matmul result */ | |||||
| int32_t *tmp_output_; /* record post c8 result */ | |||||
| int8_t *output_ptr_; | |||||
| void FreeTmpBuffer(); | |||||
| MatMulParameter *fc_param_ = nullptr; | |||||
| int8_t *weight_ptr_ = nullptr; | |||||
| int8_t *input_ptr_ = nullptr; /* record c8 input*/ | |||||
| int32_t *tmp_buffer_ = nullptr; /* record matmul result */ | |||||
| int32_t *tmp_output_ = nullptr; /* record post c8 result */ | |||||
| int8_t *output_ptr_ = nullptr; | |||||
| size_t thread_count_; | size_t thread_count_; | ||||
| size_t thread_stride_; | size_t thread_stride_; | ||||
| }; | }; | ||||
| @@ -25,10 +25,14 @@ using mindspore::lite::RET_OK; | |||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| int FullconnectionInt8CPUKernel::Init() { | int FullconnectionInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int FullconnectionInt8CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| fc_param_->row_ = (in_tensors_[0]->shape())[0]; | fc_param_->row_ = (in_tensors_[0]->shape())[0]; | ||||
| fc_param_->col_ = (in_tensors_[1]->shape())[0]; | fc_param_->col_ = (in_tensors_[1]->shape())[0]; | ||||
| fc_param_->deep_ = (in_tensors_[1]->shape())[1]; | fc_param_->deep_ = (in_tensors_[1]->shape())[1]; | ||||
| @@ -92,8 +96,6 @@ int FullconnectionInt8CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int FullconnectionInt8CPUKernel::ReSize() { return RET_OK; } | |||||
| int FullconnectionInt8CPUKernel::RunImpl(int task_id) { | int FullconnectionInt8CPUKernel::RunImpl(int task_id) { | ||||
| int cur_oc = MSMIN(thread_stride_, UP_DIV(fc_param_->col_8_, 8) - task_id * thread_stride_); | int cur_oc = MSMIN(thread_stride_, UP_DIV(fc_param_->col_8_, 8) - task_id * thread_stride_); | ||||
| if (cur_oc <= 0) { | if (cur_oc <= 0) { | ||||
| @@ -31,11 +31,7 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const Context *ctx, | ||||
| const lite::Primitive *primitive) | const lite::Primitive *primitive) | ||||
| : FullconnectionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : FullconnectionBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~FullconnectionInt8CPUKernel() override { | |||||
| ctx_->allocator->Free(a_c8_ptr_); | |||||
| ctx_->allocator->Free(b_r8_ptr_); | |||||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||||
| } | |||||
| ~FullconnectionInt8CPUKernel() override { FreeTmpBuffer(); } | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -43,11 +39,29 @@ class FullconnectionInt8CPUKernel : public FullconnectionBaseCPUKernel { | |||||
| int RunImpl(int task_id); | int RunImpl(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer() { | |||||
| if (a_c8_ptr_ != nullptr) { | |||||
| ctx_->allocator->Free(a_c8_ptr_); | |||||
| a_c8_ptr_ = nullptr; | |||||
| } | |||||
| if (b_r8_ptr_ != nullptr) { | |||||
| ctx_->allocator->Free(b_r8_ptr_); | |||||
| b_r8_ptr_ = nullptr; | |||||
| } | |||||
| if (c_r8x8_ptr_ != nullptr) { | |||||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||||
| c_r8x8_ptr_ = nullptr; | |||||
| } | |||||
| if (bias_ptr_ != nullptr) { | |||||
| ctx_->allocator->Free(bias_ptr_); | |||||
| bias_ptr_ = nullptr; | |||||
| } | |||||
| } | |||||
| MatmulQuantArg quant_params_; | MatmulQuantArg quant_params_; | ||||
| int8_t *a_c8_ptr_; | |||||
| int8_t *b_r8_ptr_; | |||||
| int *c_r8x8_ptr_; | |||||
| int *bias_ptr_; | |||||
| int8_t *a_c8_ptr_ = nullptr; | |||||
| int8_t *b_r8_ptr_ = nullptr; | |||||
| int *c_r8x8_ptr_ = nullptr; | |||||
| int *bias_ptr_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -24,17 +24,17 @@ using mindspore::lite::RET_MEMORY_FAILED; | |||||
| using mindspore::lite::RET_OK; | using mindspore::lite::RET_OK; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| MatmulInt8CPUKernel::~MatmulInt8CPUKernel() { | |||||
| ctx_->allocator->Free(a_c8_ptr_); | |||||
| ctx_->allocator->Free(b_r8_ptr_); | |||||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||||
| } | |||||
| MatmulInt8CPUKernel::~MatmulInt8CPUKernel() { FreeTmpBuffer(); } | |||||
| int MatmulInt8CPUKernel::Init() { | int MatmulInt8CPUKernel::Init() { | ||||
| if (context_->infer_shape_interrupt_ && !context_->running_) { | |||||
| set_need_reinit(); | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| return ReSize(); | |||||
| } | |||||
| int MatmulInt8CPUKernel::ReSize() { | |||||
| FreeTmpBuffer(); | |||||
| int batch = 1; | int batch = 1; | ||||
| auto x_shape = in_tensors_[0]->shape(); | auto x_shape = in_tensors_[0]->shape(); | ||||
| auto o_shape = out_tensors_[0]->shape(); | auto o_shape = out_tensors_[0]->shape(); | ||||
| @@ -88,8 +88,6 @@ int MatmulInt8CPUKernel::Init() { | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int MatmulInt8CPUKernel::ReSize() { return RET_OK; } | |||||
| int MatmulInt8CPUKernel::RunImpl(int task_id) { | int MatmulInt8CPUKernel::RunImpl(int task_id) { | ||||
| int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_); | int cur_oc = MSMIN(thread_stride_, UP_DIV(params_->col_8_, 8) - task_id * thread_stride_); | ||||
| if (cur_oc <= 0) { | if (cur_oc <= 0) { | ||||
| @@ -38,10 +38,24 @@ class MatmulInt8CPUKernel : public MatmulBaseCPUKernel { | |||||
| int RunImpl(int task_id); | int RunImpl(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer() { | |||||
| if (a_c8_ptr_ != nullptr) { | |||||
| ctx_->allocator->Free(a_c8_ptr_); | |||||
| a_c8_ptr_ = nullptr; | |||||
| } | |||||
| if (b_r8_ptr_ != nullptr) { | |||||
| ctx_->allocator->Free(b_r8_ptr_); | |||||
| b_r8_ptr_ = nullptr; | |||||
| } | |||||
| if (c_r8x8_ptr_ != nullptr) { | |||||
| ctx_->allocator->Free(c_r8x8_ptr_); | |||||
| c_r8x8_ptr_ = nullptr; | |||||
| } | |||||
| } | |||||
| MatmulQuantArg quant_params_; | MatmulQuantArg quant_params_; | ||||
| int8_t *a_c8_ptr_; | |||||
| int8_t *b_r8_ptr_; | |||||
| int *c_r8x8_ptr_; | |||||
| int8_t *a_c8_ptr_ = nullptr; | |||||
| int8_t *b_r8_ptr_ = nullptr; | |||||
| int *c_r8x8_ptr_ = nullptr; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -40,21 +40,24 @@ int PoolingInt8CPUKernel::Init() { | |||||
| MS_LOG(ERROR) << "Set pooling quant param failed."; | MS_LOG(ERROR) << "Set pooling quant param failed."; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_OK; | |||||
| if (!InferShapeDone()) { | |||||
| return RET_OK; | |||||
| } | |||||
| return ReSize(); | |||||
| } | } | ||||
| int PoolingInt8CPUKernel::ReSize() { | int PoolingInt8CPUKernel::ReSize() { | ||||
| FreeQuantParam(); | FreeQuantParam(); | ||||
| auto ret = PoolingBaseCPUKernel::Init(); | |||||
| auto ret = PoolingBaseCPUKernel::ReSize(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "PoolingBase Init failed."; | MS_LOG(ERROR) << "PoolingBase Init failed."; | ||||
| return RET_ERROR; | |||||
| return ret; | |||||
| } | } | ||||
| SetQuantParam(); | SetQuantParam(); | ||||
| ret = SetQuantParam(); | ret = SetQuantParam(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "Set pooling quant param failed."; | MS_LOG(ERROR) << "Set pooling quant param failed."; | ||||
| return RET_ERROR; | |||||
| return ret; | |||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -45,6 +45,33 @@ int Scheduler::Schedule(const lite::Model *model, std::vector<tensor::Tensor *> | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| int Scheduler::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels) { | |||||
| for (size_t i = 0; i < kernels.size(); ++i) { | |||||
| if (kernels[i] == nullptr) { | |||||
| MS_LOG(ERROR) << "input kernel is nullptr!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| auto primitive = const_cast<lite::Primitive *>(kernels[i]->GetPrimitive()); | |||||
| if (primitive == nullptr) { | |||||
| MS_LOG(ERROR) << "kernel(" << kernels[i]->name() << ")'s primitive is nullptr!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| std::vector<tensor::Tensor *> &inputs = kernels[i]->in_tensors(); | |||||
| std::vector<tensor::Tensor *> &outputs = kernels[i]->out_tensors(); | |||||
| auto ret = primitive->InferShape(inputs, outputs); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "InferShape failed, name: " << kernels[i]->name() << ", ret = " << ret; | |||||
| return ret; | |||||
| } | |||||
| ret = kernels[i]->ReSize(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "kernel " << kernels[i]->name() << " resize fail!ret = " << ret; | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| return RET_OK; | |||||
| } | |||||
| int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<tensor::Tensor *> *tensors, | int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<tensor::Tensor *> *tensors, | ||||
| std::vector<kernel::LiteKernel *> *kernels) { | std::vector<kernel::LiteKernel *> *kernels) { | ||||
| MS_EXCEPTION_IF_NULL(model); | MS_EXCEPTION_IF_NULL(model); | ||||
| @@ -29,6 +29,8 @@ class Scheduler { | |||||
| int Schedule(const lite::Model *model, std::vector<tensor::Tensor *> *tensors, | int Schedule(const lite::Model *model, std::vector<tensor::Tensor *> *tensors, | ||||
| std::vector<kernel::LiteKernel *> *kernels); | std::vector<kernel::LiteKernel *> *kernels); | ||||
| int ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels); | |||||
| protected: | protected: | ||||
| kernel::LiteKernel *ScheduleNode(const std::vector<tensor::Tensor *> &in_tensors, | kernel::LiteKernel *ScheduleNode(const std::vector<tensor::Tensor *> &in_tensors, | ||||
| const std::vector<tensor::Tensor *> &out_tensors, const lite::Primitive *primitive); | const std::vector<tensor::Tensor *> &out_tensors, const lite::Primitive *primitive); | ||||