Merge pull request !7795 from hangq/mastertags/v1.1.0
| @@ -16,6 +16,7 @@ | |||
| #include "src/lite_kernel.h" | |||
| #include <algorithm> | |||
| #include <queue> | |||
| #include "src/tensor.h" | |||
| namespace mindspore::kernel { | |||
| @@ -120,19 +121,19 @@ std::string LiteKernel::ToString() const { | |||
| std::ostringstream oss; | |||
| oss << "LiteKernel: " << this->name_; | |||
| oss << ", Type: " << this->type_str(); | |||
| oss << std::endl << this->in_tensors_.size() << " InputTensors:"; | |||
| oss << ", " << this->in_tensors_.size() << " InputTensors:"; | |||
| for (auto tensor : in_tensors_) { | |||
| oss << " " << tensor << ":" << tensor->ToString(); | |||
| oss << " " << tensor; | |||
| } | |||
| oss << std::endl << this->out_tensors_.size() << " OutputTensors:"; | |||
| oss << ", " << this->out_tensors_.size() << " OutputTensors:"; | |||
| for (auto tensor : out_tensors_) { | |||
| oss << " " << tensor << ":" << tensor->ToString(); | |||
| oss << " " << tensor; | |||
| } | |||
| oss << std::endl << this->in_kernels_.size() << " InputKernels:"; | |||
| oss << ", " << this->in_kernels_.size() << " InputKernels:"; | |||
| for (auto in_kernel : in_kernels_) { | |||
| oss << " " << in_kernel->name_; | |||
| } | |||
| oss << std::endl << this->out_kernels_.size() << " OutputKernels:"; | |||
| oss << ", " << this->out_kernels_.size() << " OutputKernels:"; | |||
| for (auto out_kernel : out_kernels_) { | |||
| oss << " " << out_kernel->name_; | |||
| } | |||
| @@ -239,6 +240,42 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec | |||
| return output_tensors; | |||
| } | |||
| int LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels) { | |||
| auto old_kernels = *kernels; | |||
| kernels->clear(); | |||
| std::queue<kernel::LiteKernel *> kernel_queue; | |||
| for (auto kernel : old_kernels) { | |||
| if (kernel->in_kernels().empty()) { | |||
| kernel_queue.push(kernel); | |||
| kernels->emplace_back(kernel); | |||
| } | |||
| } | |||
| while (!kernel_queue.empty()) { | |||
| auto cur_kernel = kernel_queue.front(); | |||
| kernel_queue.pop(); | |||
| MS_ASSERT(cur_kernel != nullptr); | |||
| auto next_kernels = cur_kernel->out_kernels(); | |||
| for (auto next_kernel : next_kernels) { | |||
| auto in_kernels = next_kernel->in_kernels(); | |||
| if (lite::IsContain(*kernels, const_cast<kernel::LiteKernel *>(next_kernel))) { | |||
| MS_LOG(ERROR) << "TopologicalSortKernels failed, loop exist"; | |||
| return RET_ERROR; | |||
| } | |||
| if (std::all_of(in_kernels.begin(), in_kernels.end(), [&](const kernel::LiteKernel *in_kernel) { | |||
| return lite::IsContain(*kernels, const_cast<kernel::LiteKernel *>(in_kernel)); | |||
| })) { | |||
| kernel_queue.push(next_kernel); | |||
| } | |||
| } | |||
| } | |||
| if (kernels->size() != old_kernels.size()) { | |||
| MS_LOG(ERROR) << "TopologicalSortKernels failed, kernels size before sort: " << old_kernels.size() | |||
| << ", kernels size after sort: " << kernels->size(); | |||
| return RET_ERROR; | |||
| } | |||
| return RET_OK; | |||
| } | |||
| void LiteKernelUtil::InitIOKernels(std::vector<kernel::LiteKernel *> &kernels) { | |||
| for (auto *kernel : kernels) { | |||
| // clean io kernels | |||
| @@ -202,6 +202,8 @@ class LiteKernelUtil { | |||
| static std::vector<lite::Tensor *> SubgraphOutputTensors(const std::vector<kernel::LiteKernel *> &kernels); | |||
| static int TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels); | |||
| static void InitTensorRefCount(std::vector<kernel::LiteKernel *> &kernels); | |||
| static int SetInput(LiteKernel &kernelMod, std::vector<lite::Tensor *> inputs); | |||
| @@ -38,17 +38,21 @@ int Scheduler::Schedule(const lite::Model *model, std::vector<Tensor *> *tensors | |||
| int ret = InferShape(model, tensors); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "op infer shape failed."; | |||
| return RET_ERROR; | |||
| return ret; | |||
| } | |||
| ret = InitOp2Kernel(model, tensors, kernels); | |||
| ret = BuildKernels(model, tensors, kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "init op to kernel failed."; | |||
| return RET_ERROR; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernelUtil::InitIOKernels(*kernels); | |||
| ConstructSubGraphs(kernels); | |||
| ret = ConstructSubGraphs(kernels); | |||
| if (ret != RET_OK) { | |||
| MS_LOG(ERROR) << "ConstructSubGraphs failed."; | |||
| return ret; | |||
| } | |||
| kernel::LiteKernelUtil::InitIOKernels(*kernels); | |||
| @@ -129,8 +133,8 @@ int Scheduler::InferShape(const lite::Model *model, std::vector<Tensor *> *tenso | |||
| return RET_OK; | |||
| } | |||
| int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<Tensor *> *tensors, | |||
| std::vector<kernel::LiteKernel *> *kernels) { | |||
| int Scheduler::BuildKernels(const lite::Model *model, std::vector<Tensor *> *tensors, | |||
| std::vector<kernel::LiteKernel *> *kernels) { | |||
| MS_ASSERT(model != nullptr); | |||
| MS_ASSERT(tensors != nullptr); | |||
| uint32_t kernelCount = model->nodes_.size(); | |||
| @@ -194,7 +198,7 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) { | |||
| std::vector<kernel::LiteKernel *> sub_kernels; | |||
| std::queue<kernel::LiteKernel *> kernel_queue; | |||
| kernel_queue.emplace(head_kernel); | |||
| auto cur_sub_graph_type = this->GetKernelSubGraphType(head_kernel); | |||
| auto cur_sub_graph_type = mindspore::lite::Scheduler::GetKernelSubGraphType(head_kernel); | |||
| while (!kernel_queue.empty()) { | |||
| auto cur_kernel = kernel_queue.front(); | |||
| kernel_queue.pop(); | |||
| @@ -202,7 +206,7 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) { | |||
| sub_kernels.emplace_back(cur_kernel); | |||
| auto post_kernels = cur_kernel->out_kernels(); | |||
| for (auto post_kernel : post_kernels) { | |||
| if (cur_sub_graph_type == this->GetKernelSubGraphType(post_kernel)) { | |||
| if (cur_sub_graph_type == mindspore::lite::Scheduler::GetKernelSubGraphType(post_kernel)) { | |||
| auto post_kernel_inputs = post_kernel->in_kernels(); | |||
| if (std::all_of(post_kernel_inputs.begin(), post_kernel_inputs.end(), | |||
| [&](kernel::LiteKernel *kernel) { return is_kernel_sinked[kernel]; })) { | |||
| @@ -37,8 +37,8 @@ class Scheduler { | |||
| kernel::LiteKernel *ScheduleNode(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors, | |||
| const mindspore::lite::PrimitiveC *primitive, const Model::Node *cnode); | |||
| int InitOp2Kernel(const lite::Model *model, std::vector<Tensor *> *tensors, | |||
| std::vector<kernel::LiteKernel *> *kernels); | |||
| int BuildKernels(const lite::Model *model, std::vector<Tensor *> *tensors, | |||
| std::vector<kernel::LiteKernel *> *kernels); | |||
| static int InferShape(const lite::Model *model, std::vector<Tensor *> *tensors); | |||
| @@ -44,23 +44,23 @@ int SubGraphKernel::Prepare() { | |||
| std::string SubGraphKernel::ToString() const { | |||
| std::ostringstream oss; | |||
| oss << "===============================================" << std::endl << "Subgraph type : " << this->subgraph_type_; | |||
| oss << std::endl << this->in_tensors_.size() << " InputTensors:"; | |||
| oss << std::endl << this->in_tensors_.size() << "Subgraph inputTensors:"; | |||
| for (auto tensor : in_tensors_) { | |||
| oss << " " << tensor << ":" << tensor->ToString(); | |||
| oss << " " << tensor; | |||
| } | |||
| oss << std::endl << this->out_tensors_.size() << " OutputTensors:"; | |||
| oss << std::endl << this->out_tensors_.size() << "Subgraph outputTensors:"; | |||
| for (auto tensor : out_tensors_) { | |||
| oss << " " << tensor << ":" << tensor->ToString(); | |||
| oss << " " << tensor; | |||
| } | |||
| oss << std::endl << "input kernels :"; | |||
| oss << std::endl << "Subgraph input kernels :" << std::endl; | |||
| for (auto kernel : this->in_kernels_) { | |||
| oss << " " << kernel->ToString(); | |||
| oss << " " << kernel->ToString() << std::endl; | |||
| } | |||
| oss << std::endl << "output kernels :"; | |||
| oss << std::endl << "Subgraph output kernels :" << std::endl; | |||
| for (auto kernel : this->out_kernels_) { | |||
| oss << " " << kernel->ToString(); | |||
| oss << " " << kernel->ToString() << std::endl; | |||
| } | |||
| oss << std::endl << nodes_.size() << " nodes :"; | |||
| oss << std::endl << nodes_.size() << " nodes in subgraph :"; | |||
| for (auto kernel : this->nodes_) { | |||
| oss << " " << kernel->name(); | |||
| } | |||
| @@ -178,36 +178,18 @@ int CpuFp16SubGraph::PreProcess() { | |||
| } | |||
| int CpuFp16SubGraph::PostProcess() { | |||
| auto fp16_to_fp32_cast_func = kernel::Float16CastUtil::GetInstance()->float16_to_float32_func_; | |||
| auto fp16_to_fp32_cast_func = Float16CastUtil::GetInstance()->float16_to_float32_func_; | |||
| if (fp16_to_fp32_cast_func == nullptr) { | |||
| MS_LOG(ERROR) << "Can not find cast fp16 to fp32 func"; | |||
| return RET_ERROR; | |||
| } | |||
| for (auto tensor : this->out_tensors_) { | |||
| if (tensor->data_type() == kNumberTypeFloat16) { | |||
| void *float16_data = nullptr; | |||
| if (this->context_ != nullptr && this->context_->allocator != nullptr) { | |||
| float16_data = this->context_->allocator->Malloc(tensor->Size()); | |||
| } else { | |||
| float16_data = malloc(tensor->Size()); | |||
| } | |||
| if (float16_data == nullptr) { | |||
| MS_LOG(ERROR) << "malloc data failed"; | |||
| return RET_ERROR; | |||
| } | |||
| memcpy(float16_data, tensor->data_c(), tensor->Size()); | |||
| auto ret = tensor->FreeData(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "free data failed"; | |||
| if (this->context_ != nullptr && this->context_->allocator != nullptr) { | |||
| this->context_->allocator->Free(float16_data); | |||
| } else { | |||
| free(float16_data); | |||
| } | |||
| return RET_ERROR; | |||
| } | |||
| auto float16_data = tensor->data_c(); | |||
| MS_ASSERT(float16_data != nullptr); | |||
| tensor->set_data(nullptr); | |||
| tensor->set_data_type(TypeId::kNumberTypeFloat32); | |||
| ret = tensor->MallocData(); | |||
| auto ret = tensor->MallocData(); | |||
| if (RET_OK != ret) { | |||
| MS_LOG(ERROR) << "malloc data failed"; | |||
| if (this->context_ != nullptr && this->context_->allocator != nullptr) { | |||
| @@ -217,9 +199,10 @@ int CpuFp16SubGraph::PostProcess() { | |||
| } | |||
| return RET_ERROR; | |||
| } | |||
| MS_ASSERT(tensor->data_c() != nullptr); | |||
| fp16_to_fp32_cast_func(float16_data, tensor->data_c(), tensor->ElementsNum()); | |||
| if (this->context_ != nullptr && this->context_->allocator != nullptr) { | |||
| this->context_->allocator->Free(float16_data); | |||
| if (tensor->allocator() != nullptr) { | |||
| tensor->allocator()->Free(float16_data); | |||
| } else { | |||
| free(float16_data); | |||
| } | |||