Merge pull request !4780 from sunsuodong/move_malloc_to_runtags/v0.7.0-beta
| @@ -61,19 +61,6 @@ int ArgMinMaxBaseCPUKernel::ReSize() { | |||||
| return RET_PARAM_INVALID; | return RET_PARAM_INVALID; | ||||
| } | } | ||||
| param->topk_ = MSMIN(param->topk_, in_shape[axis]); | param->topk_ = MSMIN(param->topk_, in_shape[axis]); | ||||
| if (param->topk_ > 1 || param->keep_dims_) { | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| param->arg_elements_ = | |||||
| reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * in_shape[axis])); | |||||
| data_from_allocator_ = true; | |||||
| } else { | |||||
| param->arg_elements_ = reinterpret_cast<ArgElement *>(malloc(sizeof(ArgElement) * in_shape[axis])); | |||||
| } | |||||
| if (param->arg_elements_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc memroy fail!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| ComputeStrides(in_shape.data(), param->in_strides_, in_shape.size()); | ComputeStrides(in_shape.data(), param->in_strides_, in_shape.size()); | ||||
| auto out_shape = out_tensors_.at(0)->shape(); | auto out_shape = out_tensors_.at(0)->shape(); | ||||
| ComputeStrides(out_shape.data(), param->out_strides_, out_shape.size()); | ComputeStrides(out_shape.data(), param->out_strides_, out_shape.size()); | ||||
| @@ -81,28 +68,24 @@ int ArgMinMaxBaseCPUKernel::ReSize() { | |||||
| } | } | ||||
| int ArgMinMaxBaseCPUKernel::Run() { | int ArgMinMaxBaseCPUKernel::Run() { | ||||
| auto input = in_tensors_.at(0); | |||||
| auto input_data = reinterpret_cast<const void *>(in_tensors_.at(0)->Data()); | |||||
| auto input_data = in_tensors_.at(0)->Data(); | |||||
| auto output_data = out_tensors_.at(0)->Data(); | auto output_data = out_tensors_.at(0)->Data(); | ||||
| auto shape = input->shape().data(); | |||||
| auto shape = in_tensors_.at(0)->shape().data(); | |||||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); | auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); | ||||
| ArgMinMax(input_data, output_data, reinterpret_cast<const int *>(shape), param); | |||||
| return RET_OK; | |||||
| } | |||||
| void ArgMinMaxBaseCPUKernel::FreeTmpMemory() { | |||||
| auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_); | |||||
| if (param->arg_elements_ == nullptr) { | |||||
| return; | |||||
| } | |||||
| if (data_from_allocator_) { | |||||
| context_->allocator->Free(param->arg_elements_); | |||||
| } else { | |||||
| free(param->arg_elements_); | |||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| if (param->topk_ > 1 || param->keep_dims_) { | |||||
| param->arg_elements_ = | |||||
| reinterpret_cast<ArgElement *>(context_->allocator->Malloc(sizeof(ArgElement) * shape[param->axis_])); | |||||
| if (param->arg_elements_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc memroy fail!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | } | ||||
| ArgMinMax(input_data, output_data, reinterpret_cast<const int *>(shape), param); | |||||
| context_->allocator->Free(param->arg_elements_); | |||||
| param->arg_elements_ = nullptr; | param->arg_elements_ = nullptr; | ||||
| return RET_OK; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArgMinMaxInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| @@ -26,20 +26,15 @@ class ArgMinMaxBaseCPUKernel : public LiteKernel { | |||||
| ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ArgMinMaxBaseCPUKernel(OpParameter *parameter, const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive), data_from_allocator_(false) {} | |||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | |||||
| virtual ~ArgMinMaxBaseCPUKernel() { FreeTmpMemory(); } | |||||
| virtual ~ArgMinMaxBaseCPUKernel() = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| int Run() override; | int Run() override; | ||||
| void FreeTmpMemory(); | |||||
| private: | private: | ||||
| bool data_from_allocator_; | |||||
| }; | }; | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -43,7 +43,6 @@ int ArgMinMaxCPUKernel::Init() { | |||||
| } | } | ||||
| int ArgMinMaxCPUKernel::ReSize() { | int ArgMinMaxCPUKernel::ReSize() { | ||||
| ArgMinMaxBaseCPUKernel::FreeTmpMemory(); | |||||
| return ArgMinMaxBaseCPUKernel::ReSize(); | return ArgMinMaxBaseCPUKernel::ReSize(); | ||||
| } | } | ||||
| @@ -29,19 +29,6 @@ using mindspore::lite::RET_OK; | |||||
| using mindspore::schema::PrimitiveType_Eltwise; | using mindspore::schema::PrimitiveType_Eltwise; | ||||
| namespace mindspore::kernel { | namespace mindspore::kernel { | ||||
| void ArithmeticCPUKernel::FreeTileData() { | |||||
| if (tile_data0_ != nullptr) { | |||||
| delete[](tile_data0_); | |||||
| tile_data0_ = nullptr; | |||||
| } | |||||
| if (tile_data1_ != nullptr) { | |||||
| delete[](tile_data1_); | |||||
| tile_data1_ = nullptr; | |||||
| } | |||||
| } | |||||
| ArithmeticCPUKernel::~ArithmeticCPUKernel() { FreeTileData(); } | |||||
| int ArithmeticCPUKernel::Init() { | int ArithmeticCPUKernel::Init() { | ||||
| if (!InferShapeDone()) { | if (!InferShapeDone()) { | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -50,7 +37,6 @@ int ArithmeticCPUKernel::Init() { | |||||
| } | } | ||||
| int ArithmeticCPUKernel::ReSize() { | int ArithmeticCPUKernel::ReSize() { | ||||
| FreeTileData(); | |||||
| arithmeticParameter_->in_elements_num0_ = in_tensors_[0]->ElementsNum(); | arithmeticParameter_->in_elements_num0_ = in_tensors_[0]->ElementsNum(); | ||||
| arithmeticParameter_->in_elements_num1_ = in_tensors_[1]->ElementsNum(); | arithmeticParameter_->in_elements_num1_ = in_tensors_[1]->ElementsNum(); | ||||
| arithmeticParameter_->out_elements_num_ = out_tensors_[0]->ElementsNum(); | arithmeticParameter_->out_elements_num_ = out_tensors_[0]->ElementsNum(); | ||||
| @@ -75,12 +61,6 @@ int ArithmeticCPUKernel::ReSize() { | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| if (arithmeticParameter_->broadcasting_) { | |||||
| tile_data0_ = new float[arithmeticParameter_->out_elements_num_]; | |||||
| tile_data1_ = new float[arithmeticParameter_->out_elements_num_]; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -144,14 +124,27 @@ int ArithmeticCPUKernel::Run() { | |||||
| if (arithmeticParameter_->broadcasting_) { | if (arithmeticParameter_->broadcasting_) { | ||||
| auto input_data0 = reinterpret_cast<float *>(in_tensors_[0]->Data()); | auto input_data0 = reinterpret_cast<float *>(in_tensors_[0]->Data()); | ||||
| auto input_data1 = reinterpret_cast<float *>(in_tensors_[1]->Data()); | auto input_data1 = reinterpret_cast<float *>(in_tensors_[1]->Data()); | ||||
| auto length = arithmeticParameter_->out_elements_num_ * sizeof(float); | |||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| tile_data0_ = reinterpret_cast<float *>(context_->allocator->Malloc(length)); | |||||
| tile_data1_ = reinterpret_cast<float *>(context_->allocator->Malloc(length)); | |||||
| if (tile_data0_ == nullptr || tile_data1_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| context_->allocator->Free(tile_data0_); | |||||
| context_->allocator->Free(tile_data1_); | |||||
| return RET_ERROR; | |||||
| } | |||||
| TileDimensions(input_data0, input_data1, tile_data0_, tile_data1_, arithmeticParameter_); | TileDimensions(input_data0, input_data1, tile_data0_, tile_data1_, arithmeticParameter_); | ||||
| } | } | ||||
| int error_code = LiteBackendParallelLaunch(ArithmeticsRun, this, thread_count_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Arithmetic function error error_code[" << error_code << "]"; | |||||
| return RET_ERROR; | |||||
| ret = LiteBackendParallelLaunch(ArithmeticsRun, this, thread_count_); | |||||
| if (arithmeticParameter_->broadcasting_) { | |||||
| context_->allocator->Free(tile_data0_); | |||||
| context_->allocator->Free(tile_data1_); | |||||
| } | } | ||||
| return RET_OK; | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Arithmetic function error error_code[" << ret << "]"; | |||||
| } | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArithmeticFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| @@ -166,7 +166,7 @@ class ArithmeticCPUKernel : public LiteKernel { | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| ~ArithmeticCPUKernel() override; | |||||
| ~ArithmeticCPUKernel() = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -174,7 +174,6 @@ class ArithmeticCPUKernel : public LiteKernel { | |||||
| int DoArithmetic(int task_id); | int DoArithmetic(int task_id); | ||||
| private: | private: | ||||
| void FreeTileData(); | |||||
| int thread_count_; | int thread_count_; | ||||
| float *tile_data0_ = nullptr; | float *tile_data0_ = nullptr; | ||||
| float *tile_data1_ = nullptr; | float *tile_data1_ = nullptr; | ||||
| @@ -51,11 +51,18 @@ int BiasCPUKernel::Run() { | |||||
| auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->Data()); | auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->Data()); | ||||
| auto out = reinterpret_cast<float *>(out_tensors_.at(0)->Data()); | auto out = reinterpret_cast<float *>(out_tensors_.at(0)->Data()); | ||||
| size_t data_size = in_tensors_.at(0)->ElementsNum(); | size_t data_size = in_tensors_.at(0)->ElementsNum(); | ||||
| auto tile_in = new float[data_size]; | |||||
| auto tile_bias = new float[data_size]; | |||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| float *tile_in = reinterpret_cast<float *>(context_->allocator->Malloc(data_size * sizeof(float))); | |||||
| float *tile_bias = reinterpret_cast<float *>(context_->allocator->Malloc(data_size * sizeof(float))); | |||||
| if (tile_in == nullptr || tile_bias == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| context_->allocator->Free(tile_in); | |||||
| context_->allocator->Free(tile_bias); | |||||
| return RET_ERROR; | |||||
| } | |||||
| BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, bias_param_); | BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, bias_param_); | ||||
| delete[] tile_in; | |||||
| delete[] tile_bias; | |||||
| context_->allocator->Free(tile_in); | |||||
| context_->allocator->Free(tile_bias); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -49,40 +49,6 @@ int EmbeddingLookupCPUKernel::ReSize() { | |||||
| embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0]; | embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0]; | ||||
| } | } | ||||
| if (input_addr_ != nullptr) { | |||||
| free(input_addr_); | |||||
| } | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc( | |||||
| sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_)); | |||||
| } else { | |||||
| input_addr_ = reinterpret_cast<float *>( | |||||
| malloc(sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_)); | |||||
| } | |||||
| if (input_addr_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| if (embedding_lookup_parameter_->is_regulated_ != nullptr) { | |||||
| free(embedding_lookup_parameter_->is_regulated_); | |||||
| } | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| embedding_lookup_parameter_->is_regulated_ = | |||||
| reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_)); | |||||
| } else { | |||||
| embedding_lookup_parameter_->is_regulated_ = | |||||
| reinterpret_cast<bool *>(malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_)); | |||||
| } | |||||
| if (embedding_lookup_parameter_->is_regulated_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Malloc buffer failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) { | |||||
| embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -111,6 +77,22 @@ int EmbeddingLookupCPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | MS_LOG(ERROR) << "Prepare fail!ret: " << prepare_ret; | ||||
| return prepare_ret; | return prepare_ret; | ||||
| } | } | ||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc( | |||||
| sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_)); | |||||
| embedding_lookup_parameter_->is_regulated_ = | |||||
| reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_)); | |||||
| if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| context_->allocator->Free(input_addr_); | |||||
| context_->allocator->Free(embedding_lookup_parameter_->is_regulated_); | |||||
| return RET_ERROR; | |||||
| } | |||||
| for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) { | |||||
| embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0; | |||||
| } | |||||
| int dest_loc = 0; | int dest_loc = 0; | ||||
| for (int i = 0; i < in_tensors_.size() - 1; i++) { | for (int i = 0; i < in_tensors_.size() - 1; i++) { | ||||
| auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->Data()); | auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->Data()); | ||||
| @@ -121,11 +103,12 @@ int EmbeddingLookupCPUKernel::Run() { | |||||
| ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->Data()); | ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->Data()); | ||||
| auto ret = LiteBackendParallelLaunch(EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num); | auto ret = LiteBackendParallelLaunch(EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num); | ||||
| context_->allocator->Free(input_addr_); | |||||
| context_->allocator->Free(embedding_lookup_parameter_->is_regulated_); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; | MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| @@ -34,23 +34,13 @@ int TopKCPUKernel::Init() { | |||||
| } | } | ||||
| int TopKCPUKernel::ReSize() { | int TopKCPUKernel::ReSize() { | ||||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||||
| lite::tensor::Tensor *input = in_tensors_.at(0); | lite::tensor::Tensor *input = in_tensors_.at(0); | ||||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||||
| parameter->last_dim_size_ = input->shape()[input->shape().size() - 1]; | parameter->last_dim_size_ = input->shape()[input->shape().size() - 1]; | ||||
| parameter->loop_num_ = 1; | parameter->loop_num_ = 1; | ||||
| for (int i = 0; i < input->shape().size() - 1; ++i) { | for (int i = 0; i < input->shape().size() - 1; ++i) { | ||||
| parameter->loop_num_ *= input->shape()[i]; | parameter->loop_num_ *= input->shape()[i]; | ||||
| } | } | ||||
| if (parameter->topk_node_list_ != nullptr) { | |||||
| free(parameter->topk_node_list_); | |||||
| parameter->topk_node_list_ = nullptr; | |||||
| } | |||||
| parameter->topk_node_list_ = malloc(sizeof(TopkNode) * parameter->last_dim_size_); | |||||
| if (parameter->topk_node_list_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc fail."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -64,7 +54,15 @@ int TopKCPUKernel::Run() { | |||||
| auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->Data()); | auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->Data()); | ||||
| auto output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data()); | auto output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data()); | ||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||||
| parameter->topk_node_list_ = context_->allocator->Malloc(sizeof(TopkNode) * parameter->last_dim_size_); | |||||
| if (parameter->topk_node_list_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| Topk(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_)); | Topk(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_)); | ||||
| context_->allocator->Free(parameter->topk_node_list_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -47,32 +47,6 @@ int ArithmeticsInt8Launch(int thread_id, LiteParallelGroupEnv *penv, void *cdata | |||||
| } | } | ||||
| } // namespace | } // namespace | ||||
| void ArithmeticInt8CPUKernel::FreeTileData() { | |||||
| auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | |||||
| if (!param->broadcasting_) { | |||||
| return; | |||||
| } | |||||
| if (context_->allocator != nullptr) { | |||||
| if (tile_data0_ != nullptr) { | |||||
| context_->allocator->Free(tile_data0_); | |||||
| } | |||||
| if (tile_data1_ != nullptr) { | |||||
| context_->allocator->Free(tile_data1_); | |||||
| } | |||||
| } else { | |||||
| if (tile_data0_ != nullptr) { | |||||
| free(tile_data0_); | |||||
| } | |||||
| if (tile_data1_ != nullptr) { | |||||
| free(tile_data1_); | |||||
| } | |||||
| } | |||||
| tile_data0_ = nullptr; | |||||
| tile_data1_ = nullptr; | |||||
| } | |||||
| ArithmeticInt8CPUKernel::~ArithmeticInt8CPUKernel() { FreeTileData(); } | |||||
| int ArithmeticInt8CPUKernel::Init() { | int ArithmeticInt8CPUKernel::Init() { | ||||
| switch (op_parameter_->type_) { | switch (op_parameter_->type_) { | ||||
| case PrimitiveType_Equal: | case PrimitiveType_Equal: | ||||
| @@ -121,21 +95,6 @@ int ArithmeticInt8CPUKernel::Init() { | |||||
| } | } | ||||
| int ArithmeticInt8CPUKernel::ReSize() { | int ArithmeticInt8CPUKernel::ReSize() { | ||||
| FreeTileData(); | |||||
| auto data_size = out_tensors_[0]->Size(); | |||||
| auto param = reinterpret_cast<ArithmeticParameter *>(op_parameter_); | |||||
| if (param->broadcasting_) { | |||||
| if (context_->allocator != nullptr) { | |||||
| tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size)); | |||||
| tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(data_size)); | |||||
| } else { | |||||
| tile_data0_ = reinterpret_cast<int8_t *>(malloc(data_size)); | |||||
| tile_data1_ = reinterpret_cast<int8_t *>(malloc(data_size)); | |||||
| } | |||||
| } else { | |||||
| tile_data0_ = nullptr; | |||||
| tile_data1_ = nullptr; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -182,14 +141,25 @@ int ArithmeticInt8CPUKernel::Run() { | |||||
| if (param->broadcasting_) { | if (param->broadcasting_) { | ||||
| auto input_data0 = reinterpret_cast<int8_t *>(in_tensors_[0]->Data()); | auto input_data0 = reinterpret_cast<int8_t *>(in_tensors_[0]->Data()); | ||||
| auto input_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->Data()); | auto input_data1 = reinterpret_cast<int8_t *>(in_tensors_[1]->Data()); | ||||
| tile_data0_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(out_tensors_[0]->Size())); | |||||
| tile_data1_ = reinterpret_cast<int8_t *>(context_->allocator->Malloc(out_tensors_[0]->Size())); | |||||
| if (tile_data0_ == nullptr || tile_data1_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| context_->allocator->Free(tile_data0_); | |||||
| context_->allocator->Free(tile_data1_); | |||||
| return RET_ERROR; | |||||
| } | |||||
| TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param); | TileDimensionsInt8(input_data0, input_data1, tile_data0_, tile_data1_, param); | ||||
| } | } | ||||
| int error_code = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, op_parameter_->thread_num_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << error_code; | |||||
| return RET_ERROR; | |||||
| ret = LiteBackendParallelLaunch(ArithmeticsInt8Launch, this, op_parameter_->thread_num_); | |||||
| if (param->broadcasting_) { | |||||
| context_->allocator->Free(tile_data0_); | |||||
| context_->allocator->Free(tile_data1_); | |||||
| } | } | ||||
| return RET_OK; | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Arithmetic launch function fail! ret: " << ret; | |||||
| } | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuArithmeticInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| @@ -32,7 +32,7 @@ class ArithmeticInt8CPUKernel : public LiteKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~ArithmeticInt8CPUKernel(); | |||||
| ~ArithmeticInt8CPUKernel() = default; | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -40,7 +40,6 @@ class ArithmeticInt8CPUKernel : public LiteKernel { | |||||
| int DoArithmetic(int thread_id); | int DoArithmetic(int thread_id); | ||||
| private: | private: | ||||
| void FreeTileData(); | |||||
| int8_t *tile_data0_; | int8_t *tile_data0_; | ||||
| int8_t *tile_data1_; | int8_t *tile_data1_; | ||||
| ArithmeticRunInt8 arithmetic_run_; | ArithmeticRunInt8 arithmetic_run_; | ||||
| @@ -59,49 +59,6 @@ int DivInt8CPUKernel::Init() { | |||||
| } | } | ||||
| int DivInt8CPUKernel::ReSize() { | int DivInt8CPUKernel::ReSize() { | ||||
| if (broadcast_) { | |||||
| if (tile0_data_ != nullptr) { | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| context_->allocator->Free(tile0_data_); | |||||
| } else { | |||||
| free(tile0_data_); | |||||
| } | |||||
| } | |||||
| if (tile1_data_ != nullptr) { | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| context_->allocator->Free(tile1_data_); | |||||
| } else { | |||||
| free(tile1_data_); | |||||
| } | |||||
| } | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| } else { | |||||
| tile0_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size())); | |||||
| tile1_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size())); | |||||
| } | |||||
| if (tile0_data_ == nullptr || tile1_data_ == nullptr) { | |||||
| if (tile0_data_ != nullptr) { | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| context_->allocator->Free(tile0_data_); | |||||
| } else { | |||||
| free(tile0_data_); | |||||
| } | |||||
| } | |||||
| if (tile1_data_ != nullptr) { | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| context_->allocator->Free(tile1_data_); | |||||
| } else { | |||||
| free(tile1_data_); | |||||
| } | |||||
| } | |||||
| MS_LOG(ERROR) << "malloc memroy fail!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -154,12 +111,23 @@ int DivInt8CPUKernel::Run() { | |||||
| tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i); | tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i); | ||||
| tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i); | tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i); | ||||
| } | } | ||||
| tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| if (tile0_data_ == nullptr || tile1_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| context_->allocator->Free(tile0_data_); | |||||
| context_->allocator->Free(tile1_data_); | |||||
| return RET_ERROR; | |||||
| } | |||||
| TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()), | TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()), | ||||
| static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_), | static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_), | ||||
| reinterpret_cast<uint8_t *>(tile1_data_), &tile_para); | reinterpret_cast<uint8_t *>(tile1_data_), &tile_para); | ||||
| } | } | ||||
| ret = LiteBackendParallelLaunch(DivInt8Run, this, op_parameter_->thread_num_); | ret = LiteBackendParallelLaunch(DivInt8Run, this, op_parameter_->thread_num_); | ||||
| if (broadcast_) { | |||||
| context_->allocator->Free(tile0_data_); | |||||
| context_->allocator->Free(tile1_data_); | |||||
| } | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]"; | MS_LOG(ERROR) << "DivInt8Run function error error_code[" << ret << "]"; | ||||
| } | } | ||||
| @@ -62,30 +62,8 @@ int SoftmaxInt8CPUKernel::Init() { | |||||
| return ReSize(); | return ReSize(); | ||||
| } | } | ||||
| void SoftmaxInt8CPUKernel::FreeTmpBuffer() { | |||||
| if (exp_data_ != nullptr) { | |||||
| free(exp_data_); | |||||
| exp_data_ = nullptr; | |||||
| } | |||||
| if (sum_data_ != nullptr) { | |||||
| free(sum_data_); | |||||
| sum_data_ = nullptr; | |||||
| } | |||||
| } | |||||
| int SoftmaxInt8CPUKernel::ReSize() { | int SoftmaxInt8CPUKernel::ReSize() { | ||||
| auto ret = SoftmaxBaseCPUKernel::ReSize(); | |||||
| if (ret != RET_OK) { | |||||
| return ret; | |||||
| } | |||||
| FreeTmpBuffer(); | |||||
| exp_data_ = reinterpret_cast<int *>(malloc(softmax_param_->element_size_ * sizeof(int))); | |||||
| int inner_size = 1; | |||||
| for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) { | |||||
| inner_size *= softmax_param_->input_shape_[i]; | |||||
| } | |||||
| sum_data_ = reinterpret_cast<int *>(malloc(inner_size * sizeof(int))); | |||||
| return RET_OK; | |||||
| return SoftmaxBaseCPUKernel::ReSize(); | |||||
| } | } | ||||
| int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) { | int SoftmaxInt8CPUKernel::DoSoftmax(int task_id) { | ||||
| @@ -132,12 +110,24 @@ int SoftmaxInt8CPUKernel::Run() { | |||||
| MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | MS_LOG(ERROR) << "Prepare fail!ret: " << ret; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| int error_code = LiteBackendParallelLaunch(SoftmaxRun, this, thread_count_); | |||||
| if (error_code != RET_OK) { | |||||
| MS_LOG(ERROR) << "Softmax function error error_code[" << error_code << "]"; | |||||
| exp_data_ = reinterpret_cast<int *>(context_->allocator->Malloc(softmax_param_->element_size_ * sizeof(int))); | |||||
| int inner_size = 1; | |||||
| for (int i = softmax_param_->axis_ + 1; i < softmax_param_->n_dim_; i++) { | |||||
| inner_size *= softmax_param_->input_shape_[i]; | |||||
| } | |||||
| sum_data_ = reinterpret_cast<int *>(context_->allocator->Malloc(inner_size * sizeof(int))); | |||||
| if (exp_data_ == nullptr || sum_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| context_->allocator->Free(exp_data_); | |||||
| context_->allocator->Free(sum_data_); | |||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| return RET_OK; | |||||
| ret = LiteBackendParallelLaunch(SoftmaxRun, this, thread_count_); | |||||
| context_->allocator->Free(exp_data_); | |||||
| context_->allocator->Free(sum_data_); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Softmax function error error_code[" << ret << "]"; | |||||
| } | |||||
| return ret; | |||||
| } | } | ||||
| } // namespace mindspore::kernel | } // namespace mindspore::kernel | ||||
| @@ -28,7 +28,7 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel { | |||||
| const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | const std::vector<lite::tensor::Tensor *> &outputs, const lite::Context *ctx, | ||||
| const mindspore::lite::PrimitiveC *primitive) | const mindspore::lite::PrimitiveC *primitive) | ||||
| : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | : SoftmaxBaseCPUKernel(parameter, inputs, outputs, ctx, primitive) {} | ||||
| ~SoftmaxInt8CPUKernel() { FreeTmpBuffer(); } | |||||
| ~SoftmaxInt8CPUKernel() {} | |||||
| int Init() override; | int Init() override; | ||||
| int ReSize() override; | int ReSize() override; | ||||
| @@ -36,7 +36,6 @@ class SoftmaxInt8CPUKernel : public SoftmaxBaseCPUKernel { | |||||
| int DoSoftmax(int task_id); | int DoSoftmax(int task_id); | ||||
| private: | private: | ||||
| void FreeTmpBuffer(); | |||||
| int *sum_data_ = nullptr; | int *sum_data_ = nullptr; | ||||
| int *exp_data_ = nullptr; | int *exp_data_ = nullptr; | ||||
| SoftmaxQuantArg quant_params_; | SoftmaxQuantArg quant_params_; | ||||
| @@ -81,35 +81,6 @@ int SubInt8CPUKernel::Init() { | |||||
| } | } | ||||
| int SubInt8CPUKernel::ReSize() { | int SubInt8CPUKernel::ReSize() { | ||||
| if (broadcast_) { | |||||
| if (tile0_data_ != nullptr) { | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| context_->allocator->Free(tile0_data_); | |||||
| } else { | |||||
| free(tile0_data_); | |||||
| } | |||||
| } | |||||
| if (tile1_data_ != nullptr) { | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| context_->allocator->Free(tile1_data_); | |||||
| } else { | |||||
| free(tile1_data_); | |||||
| } | |||||
| } | |||||
| if (context_ != nullptr && context_->allocator != nullptr) { | |||||
| tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| } else { | |||||
| tile0_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size())); | |||||
| tile1_data_ = static_cast<int8_t *>(malloc(sizeof(int8_t) * out_tensors_.at(0)->Size())); | |||||
| } | |||||
| if (tile0_data_ == nullptr || tile1_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc memroy fail!"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -164,17 +135,27 @@ int SubInt8CPUKernel::Run() { | |||||
| tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i); | tile_para.in_shape1_[i] = in_tensors_.at(1)->DimensionSize(i); | ||||
| tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i); | tile_para.out_shape_[i] = out_tensors_.at(0)->DimensionSize(i); | ||||
| } | } | ||||
| tile0_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| tile1_data_ = static_cast<int8_t *>(context_->allocator->Malloc(out_tensors_.at(0)->Size())); | |||||
| if (tile0_data_ == nullptr || tile1_data_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc memroy fail!"; | |||||
| context_->allocator->Free(tile0_data_); | |||||
| context_->allocator->Free(tile1_data_); | |||||
| return RET_ERROR; | |||||
| } | |||||
| TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()), | TileDimensionsUint8(static_cast<uint8_t *>(in_tensors_.at(0)->Data()), | ||||
| static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_), | static_cast<uint8_t *>(in_tensors_.at(1)->Data()), reinterpret_cast<uint8_t *>(tile0_data_), | ||||
| reinterpret_cast<uint8_t *>(tile1_data_), &tile_para); | reinterpret_cast<uint8_t *>(tile1_data_), &tile_para); | ||||
| } | } | ||||
| ret = LiteBackendParallelLaunch(SubInt8Run, this, op_parameter_->thread_num_); | ret = LiteBackendParallelLaunch(SubInt8Run, this, op_parameter_->thread_num_); | ||||
| if (broadcast_) { | |||||
| context_->allocator->Free(tile0_data_); | |||||
| context_->allocator->Free(tile1_data_); | |||||
| } | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "SubInt8Run function error error_code[" << ret << "]"; | MS_LOG(ERROR) << "SubInt8Run function error error_code[" << ret << "]"; | ||||
| return RET_ERROR; | |||||
| } | } | ||||
| return RET_OK; | |||||
| return ret; | |||||
| } | } | ||||
| kernel::LiteKernel *CpuSubInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | kernel::LiteKernel *CpuSubInt8KernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, | ||||
| @@ -43,12 +43,6 @@ int TopKInt8CPUKernel::ReSize() { | |||||
| for (int i = 0; i < input->shape().size() - 1; ++i) { | for (int i = 0; i < input->shape().size() - 1; ++i) { | ||||
| parameter->loop_num_ *= input->shape()[i]; | parameter->loop_num_ *= input->shape()[i]; | ||||
| } | } | ||||
| parameter->topk_node_list_ = malloc(sizeof(TopkNodeInt8) * parameter->last_dim_size_); | |||||
| if (parameter->topk_node_list_ == nullptr) { | |||||
| MS_LOG(ERROR) << "malloc fail."; | |||||
| return RET_ERROR; | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -62,7 +56,15 @@ int TopKInt8CPUKernel::Run() { | |||||
| int8_t *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data()); | int8_t *output_data = reinterpret_cast<int8_t *>(out_tensors_.at(0)->Data()); | ||||
| int32_t *output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data()); | int32_t *output_index = reinterpret_cast<int32_t *>(out_tensors_.at(1)->Data()); | ||||
| MS_ASSERT(context_->allocator != nullptr); | |||||
| TopkParameter *parameter = reinterpret_cast<TopkParameter *>(op_parameter_); | |||||
| parameter->topk_node_list_ = context_->allocator->Malloc(sizeof(TopkNodeInt8) * parameter->last_dim_size_); | |||||
| if (parameter->topk_node_list_ == nullptr) { | |||||
| MS_LOG(ERROR) << "Memory allocation failed"; | |||||
| return RET_ERROR; | |||||
| } | |||||
| TopkInt8(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_)); | TopkInt8(input_data, output_data, output_index, reinterpret_cast<TopkParameter *>(op_parameter_)); | ||||
| context_->allocator->Free(parameter->topk_node_list_); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||