From: @hangangqiang Reviewed-by: Signed-off-by:tags/v1.1.0
| @@ -45,7 +45,7 @@ void LiteKernel::FreeWorkspace() { | |||||
| bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) { | bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) { | ||||
| return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) { | return std::all_of(this->in_tensors().begin(), this->in_tensors().end(), [&](lite::Tensor *kernel_in_tensor) { | ||||
| if (IsContain(scope_tensors, kernel_in_tensor)) { | if (IsContain(scope_tensors, kernel_in_tensor)) { | ||||
| return (kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1); | |||||
| return (kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || kernel_in_tensor->ref_count() >= 1); | |||||
| } else { | } else { | ||||
| return true; | return true; | ||||
| } | } | ||||
| @@ -54,7 +54,7 @@ bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) { | |||||
| void LiteKernel::InitOutTensorInitRefCount() { | void LiteKernel::InitOutTensorInitRefCount() { | ||||
| for (auto *tensor : this->out_tensors_) { | for (auto *tensor : this->out_tensors_) { | ||||
| int init_ref_count = 0; | |||||
| size_t init_ref_count = 0; | |||||
| for (auto *post_kernel : this->out_kernels_) { | for (auto *post_kernel : this->out_kernels_) { | ||||
| init_ref_count += | init_ref_count += | ||||
| std::count_if(post_kernel->in_tensors_.begin(), post_kernel->in_tensors_.end(), | std::count_if(post_kernel->in_tensors_.begin(), post_kernel->in_tensors_.end(), | ||||
| @@ -81,7 +81,7 @@ int LiteKernel::DecOutTensorRefCount() { | |||||
| int LiteKernel::FreeInWorkTensor() const { | int LiteKernel::FreeInWorkTensor() const { | ||||
| for (auto &in_tensor : this->in_tensors_) { | for (auto &in_tensor : this->in_tensors_) { | ||||
| MS_ASSERT(in_tensor != nullptr); | MS_ASSERT(in_tensor != nullptr); | ||||
| if (in_tensor->IsConst()) { | |||||
| if (in_tensor->IsConst() || in_tensor->IsGraphInput()) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| MS_ASSERT(in_tensor->ref_count() > 0); | MS_ASSERT(in_tensor->ref_count() > 0); | ||||
| @@ -220,19 +220,18 @@ void LiteKernel::FindInoutKernels(const std::vector<kernel::LiteKernel *> &scope | |||||
| } | } | ||||
| } | } | ||||
| std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphInputKernels( | |||||
| const std::vector<kernel::LiteKernel *> &kernels) { | |||||
| std::set<kernel::LiteKernel *> input_kernels; | |||||
| std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphInputNodes(const std::vector<kernel::LiteKernel *> &kernels) { | |||||
| std::set<kernel::LiteKernel *> input_nodes; | |||||
| for (const auto &kernel : kernels) { | for (const auto &kernel : kernels) { | ||||
| // if kernel has no pre-kernel, kernel is a graph input, it must be a subgraph input | // if kernel has no pre-kernel, kernel is a graph input, it must be a subgraph input | ||||
| if (kernel->in_kernels().empty() && !kernel->in_tensors().empty()) { | if (kernel->in_kernels().empty() && !kernel->in_tensors().empty()) { | ||||
| input_kernels.insert(kernel); | |||||
| input_nodes.insert(kernel); | |||||
| continue; | continue; | ||||
| } | } | ||||
| auto all_input_tensors = kernel->in_tensors(); | auto all_input_tensors = kernel->in_tensors(); | ||||
| // remove all const tensor from input tensors | // remove all const tensor from input tensors | ||||
| for (auto iter = all_input_tensors.begin(); iter != all_input_tensors.end();) { | for (auto iter = all_input_tensors.begin(); iter != all_input_tensors.end();) { | ||||
| if ((*iter)->IsConst()) { | |||||
| if ((*iter)->IsConst() || (*iter)->IsGraphInput()) { | |||||
| iter = all_input_tensors.erase(iter); | iter = all_input_tensors.erase(iter); | ||||
| } else { | } else { | ||||
| iter++; | iter++; | ||||
| @@ -249,83 +248,76 @@ std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphInputKernels( | |||||
| } | } | ||||
| // if some input tensor is not from kernel in subgraph | // if some input tensor is not from kernel in subgraph | ||||
| if (!all_input_tensors.empty()) { | if (!all_input_tensors.empty()) { | ||||
| input_kernels.insert(kernel); | |||||
| input_nodes.insert(kernel); | |||||
| } | } | ||||
| } | } | ||||
| std::vector<kernel::LiteKernel *> result; | std::vector<kernel::LiteKernel *> result; | ||||
| result.insert(result.end(), input_kernels.begin(), input_kernels.end()); | |||||
| result.insert(result.end(), input_nodes.begin(), input_nodes.end()); | |||||
| return result; | return result; | ||||
| } | } | ||||
| std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphOutputKernels( | |||||
| std::vector<kernel::LiteKernel *> LiteKernelUtil::SubgraphOutputNodes( | |||||
| const std::vector<kernel::LiteKernel *> &kernels) { | const std::vector<kernel::LiteKernel *> &kernels) { | ||||
| std::set<kernel::LiteKernel *> output_kernels; | |||||
| std::set<kernel::LiteKernel *> output_nodes; | |||||
| // if kernel has no post-kernel, kernel is a graph output, it must be a subgraph output | // if kernel has no post-kernel, kernel is a graph output, it must be a subgraph output | ||||
| for (const auto &kernel : kernels) { | for (const auto &kernel : kernels) { | ||||
| if (kernel->is_model_output() || (kernel->out_kernels().empty() && !kernel->out_tensors().empty())) { | if (kernel->is_model_output() || (kernel->out_kernels().empty() && !kernel->out_tensors().empty())) { | ||||
| output_kernels.insert(kernel); | |||||
| output_nodes.insert(kernel); | |||||
| continue; | continue; | ||||
| } | } | ||||
| for (const auto &output : kernel->out_kernels()) { | for (const auto &output : kernel->out_kernels()) { | ||||
| auto out_kernel_in_graph = std::find(kernels.begin(), kernels.end(), output); | auto out_kernel_in_graph = std::find(kernels.begin(), kernels.end(), output); | ||||
| if (out_kernel_in_graph == kernels.end()) { | if (out_kernel_in_graph == kernels.end()) { | ||||
| output_kernels.insert(kernel); | |||||
| output_nodes.insert(kernel); | |||||
| break; | break; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| std::vector<kernel::LiteKernel *> result; | std::vector<kernel::LiteKernel *> result; | ||||
| result.insert(result.end(), output_kernels.begin(), output_kernels.end()); | |||||
| result.insert(result.end(), output_nodes.begin(), output_nodes.end()); | |||||
| return result; | return result; | ||||
| } | } | ||||
| std::vector<lite::Tensor *> LiteKernelUtil::SubgraphInputTensors(const std::vector<kernel::LiteKernel *> &kernels) { | std::vector<lite::Tensor *> LiteKernelUtil::SubgraphInputTensors(const std::vector<kernel::LiteKernel *> &kernels) { | ||||
| std::vector<lite::Tensor *> input_tensors; | |||||
| std::vector<kernel::LiteKernel *> input_kernels = SubgraphInputKernels(kernels); | |||||
| for (const auto &input_kernel : input_kernels) { | |||||
| auto &outer_in_kernels = input_kernel->in_kernels(); | |||||
| auto &in_kernel_in_tensors = input_kernel->in_tensors(); | |||||
| if (outer_in_kernels.empty()) { | |||||
| for (auto &in_kernel_in_tensor : in_kernel_in_tensors) { | |||||
| if (!in_kernel_in_tensor->IsConst()) { | |||||
| if (!IsContain(input_tensors, in_kernel_in_tensor)) { | |||||
| input_tensors.push_back(in_kernel_in_tensor); | |||||
| } | |||||
| } | |||||
| std::set<lite::Tensor *> input_tensors; | |||||
| std::vector<kernel::LiteKernel *> input_nodes = SubgraphInputNodes(kernels); | |||||
| for (const auto &input_node : input_nodes) { | |||||
| auto &in_node_in_kernels = input_node->in_kernels(); | |||||
| auto &in_node_in_tensors = input_node->in_tensors(); | |||||
| for (auto &in_node_in_tensor : in_node_in_tensors) { | |||||
| if (in_node_in_tensor->IsGraphInput()) { | |||||
| input_tensors.insert(in_node_in_tensor); | |||||
| } | } | ||||
| continue; | |||||
| } | } | ||||
| for (auto outer_in_kernel : outer_in_kernels) { | |||||
| auto iter = std::find(kernels.begin(), kernels.end(), outer_in_kernel); | |||||
| for (auto in_node_in_kernel : in_node_in_kernels) { | |||||
| auto iter = std::find(kernels.begin(), kernels.end(), in_node_in_kernel); | |||||
| if (iter != kernels.end()) { | if (iter != kernels.end()) { | ||||
| continue; | continue; | ||||
| } | } | ||||
| auto &outer_in_kernel_out_tensors = outer_in_kernel->out_tensors(); | |||||
| for (auto in_kernel_in_tensor : in_kernel_in_tensors) { | |||||
| auto &outer_in_kernel_out_tensors = in_node_in_kernel->out_tensors(); | |||||
| for (auto in_node_in_tensor : in_node_in_tensors) { | |||||
| auto outer_in_kernel_out_tensors_iter = | auto outer_in_kernel_out_tensors_iter = | ||||
| std::find(outer_in_kernel_out_tensors.begin(), outer_in_kernel_out_tensors.end(), in_kernel_in_tensor); | |||||
| std::find(outer_in_kernel_out_tensors.begin(), outer_in_kernel_out_tensors.end(), in_node_in_tensor); | |||||
| if (outer_in_kernel_out_tensors_iter != outer_in_kernel_out_tensors.end()) { | if (outer_in_kernel_out_tensors_iter != outer_in_kernel_out_tensors.end()) { | ||||
| if (!IsContain(input_tensors, in_kernel_in_tensor)) { | |||||
| input_tensors.emplace_back(in_kernel_in_tensor); | |||||
| } | |||||
| input_tensors.insert(in_node_in_tensor); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return input_tensors; | |||||
| std::vector<lite::Tensor *> result; | |||||
| result.insert(result.end(), input_tensors.begin(), input_tensors.end()); | |||||
| return result; | |||||
| } | } | ||||
| std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vector<kernel::LiteKernel *> &kernels) { | std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vector<kernel::LiteKernel *> &kernels) { | ||||
| std::vector<lite::Tensor *> output_tensors; | |||||
| std::vector<kernel::LiteKernel *> output_kernels = SubgraphOutputKernels(kernels); | |||||
| for (const auto &output_kernel : output_kernels) { | |||||
| std::set<lite::Tensor *> output_tensors; | |||||
| std::vector<kernel::LiteKernel *> output_nodes = SubgraphOutputNodes(kernels); | |||||
| for (const auto &output_kernel : output_nodes) { | |||||
| auto &outer_out_kernels = output_kernel->out_kernels(); | auto &outer_out_kernels = output_kernel->out_kernels(); | ||||
| auto &out_kernel_out_tensors = output_kernel->out_tensors(); | auto &out_kernel_out_tensors = output_kernel->out_tensors(); | ||||
| if (outer_out_kernels.empty()) { | if (outer_out_kernels.empty()) { | ||||
| for (auto out_kernel_out_tensor : out_kernel_out_tensors) { | for (auto out_kernel_out_tensor : out_kernel_out_tensors) { | ||||
| if (!IsContain(output_tensors, out_kernel_out_tensor)) { | |||||
| output_tensors.push_back(out_kernel_out_tensor); | |||||
| } | |||||
| output_tensors.insert(out_kernel_out_tensor); | |||||
| } | } | ||||
| continue; | continue; | ||||
| } | } | ||||
| @@ -339,14 +331,14 @@ std::vector<lite::Tensor *> LiteKernelUtil::SubgraphOutputTensors(const std::vec | |||||
| auto outer_out_kernel_in_tensors_iter = | auto outer_out_kernel_in_tensors_iter = | ||||
| std::find(outer_out_kernel_in_tensors.begin(), outer_out_kernel_in_tensors.end(), out_kernel_out_tensor); | std::find(outer_out_kernel_in_tensors.begin(), outer_out_kernel_in_tensors.end(), out_kernel_out_tensor); | ||||
| if (outer_out_kernel_in_tensors_iter != outer_out_kernel_in_tensors.end()) { | if (outer_out_kernel_in_tensors_iter != outer_out_kernel_in_tensors.end()) { | ||||
| if (!IsContain(output_tensors, out_kernel_out_tensor)) { | |||||
| output_tensors.emplace_back(out_kernel_out_tensor); | |||||
| } | |||||
| output_tensors.insert(out_kernel_out_tensor); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return output_tensors; | |||||
| std::vector<lite::Tensor *> result; | |||||
| result.insert(result.end(), output_tensors.begin(), output_tensors.end()); | |||||
| return result; | |||||
| } | } | ||||
| int LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels) { | int LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> *kernels) { | ||||
| @@ -56,7 +56,6 @@ enum SubGraphType { kNotSubGraph = 0, kCpuFP32SubGraph, kCpuFP16SubGraph, kGpuSu | |||||
| class LiteKernel { | class LiteKernel { | ||||
| public: | public: | ||||
| LiteKernel() = default; | LiteKernel() = default; | ||||
| // parameter should be deleted or freed by caller, and should be deleted or freed after LiteKernel is deleted | |||||
| LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors, | LiteKernel(OpParameter *parameter, std::vector<lite::Tensor *> in_tensors, std::vector<lite::Tensor *> out_tensors, | ||||
| const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) | const lite::InnerContext *ctx, const mindspore::lite::PrimitiveC *primitive) | ||||
| : op_parameter_(parameter), | : op_parameter_(parameter), | ||||
| @@ -214,9 +213,9 @@ typedef LiteKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs, | |||||
| class LiteKernelUtil { | class LiteKernelUtil { | ||||
| public: | public: | ||||
| static std::vector<kernel::LiteKernel *> SubgraphInputKernels(const std::vector<kernel::LiteKernel *> &kernels); | |||||
| static std::vector<kernel::LiteKernel *> SubgraphInputNodes(const std::vector<kernel::LiteKernel *> &kernels); | |||||
| static std::vector<kernel::LiteKernel *> SubgraphOutputKernels(const std::vector<kernel::LiteKernel *> &kernels); | |||||
| static std::vector<kernel::LiteKernel *> SubgraphOutputNodes(const std::vector<kernel::LiteKernel *> &kernels); | |||||
| static std::vector<lite::Tensor *> SubgraphInputTensors(const std::vector<kernel::LiteKernel *> &kernels); | static std::vector<lite::Tensor *> SubgraphInputTensors(const std::vector<kernel::LiteKernel *> &kernels); | ||||
| @@ -148,9 +148,6 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) { | |||||
| } else { | } else { | ||||
| dst_tensor = new (std::nothrow) Tensor(TypeId(src_tensor.dataType()), shape, src_tensor.format(), src_category); | dst_tensor = new (std::nothrow) Tensor(TypeId(src_tensor.dataType()), shape, src_tensor.format(), src_category); | ||||
| } | } | ||||
| if (dst_tensor == nullptr) { | |||||
| return nullptr; | |||||
| } | |||||
| return dst_tensor; | return dst_tensor; | ||||
| } | } | ||||
| @@ -158,6 +155,8 @@ int LiteSession::ConvertTensors(const lite::Model *model) { | |||||
| MS_ASSERT(model != nullptr); | MS_ASSERT(model != nullptr); | ||||
| copyed_tensor_idxes_.clear(); | copyed_tensor_idxes_.clear(); | ||||
| uint32_t tensor_count = model->all_tensors_.size(); | uint32_t tensor_count = model->all_tensors_.size(); | ||||
| MS_ASSERT(!model->sub_graphs_.empty()); | |||||
| auto model_input_indices = model->sub_graphs_.front()->input_indices_; | |||||
| for (uint32_t i = 0; i < tensor_count; ++i) { | for (uint32_t i = 0; i < tensor_count; ++i) { | ||||
| auto *src_tensor = model->all_tensors_[i]; | auto *src_tensor = model->all_tensors_[i]; | ||||
| if (src_tensor == nullptr) { | if (src_tensor == nullptr) { | ||||
| @@ -176,6 +175,9 @@ int LiteSession::ConvertTensors(const lite::Model *model) { | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| ConvertTensorsQuantParam(src_tensor, dst_tensor); | ConvertTensorsQuantParam(src_tensor, dst_tensor); | ||||
| if (IsContain(model_input_indices, i)) { | |||||
| dst_tensor->set_category(Tensor::GRAPH_INPUT); | |||||
| } | |||||
| this->tensors_.emplace_back(dst_tensor); | this->tensors_.emplace_back(dst_tensor); | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -329,6 +331,9 @@ void LiteSession::InitGraphInOutTensors(const lite::Model *model) { | |||||
| InitGraphOutputNodeMap(model); | InitGraphOutputNodeMap(model); | ||||
| InitGraphOutputTensorNames(model); | InitGraphOutputTensorNames(model); | ||||
| InitGraphOutputTensorMap(model); | InitGraphOutputTensorMap(model); | ||||
| for (auto *tensor : this->inputs_) { | |||||
| tensor->set_category(Tensor::Category::GRAPH_INPUT); | |||||
| } | |||||
| } | } | ||||
| int LiteSession::CompileGraph(Model *model) { | int LiteSession::CompileGraph(Model *model) { | ||||
| @@ -398,11 +403,6 @@ int LiteSession::PrepareKernels(Model *model) { | |||||
| // find in_kernels and out_kernels for subgraphs | // find in_kernels and out_kernels for subgraphs | ||||
| for (auto kernel : this->kernels_) { | for (auto kernel : this->kernels_) { | ||||
| kernel->FindInoutKernels(this->kernels_); | kernel->FindInoutKernels(this->kernels_); | ||||
| auto ret = kernel->Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret; | |||||
| return ret; | |||||
| } | |||||
| auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel); | auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel); | ||||
| MS_ASSERT(sub_graph != nullptr); | MS_ASSERT(sub_graph != nullptr); | ||||
| auto kernel_in_subgraph = sub_graph->nodes(); | auto kernel_in_subgraph = sub_graph->nodes(); | ||||
| @@ -417,6 +417,13 @@ int LiteSession::PrepareKernels(Model *model) { | |||||
| kernel->InitOutTensorInitRefCount(); | kernel->InitOutTensorInitRefCount(); | ||||
| } | } | ||||
| AdjustModelOutputTensorInitRefCount(model); | AdjustModelOutputTensorInitRefCount(model); | ||||
| for (auto kernel : this->kernels_) { | |||||
| auto ret = kernel->Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret; | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -28,7 +28,7 @@ namespace mindspore::kernel { | |||||
| int MergeCPUKernel::FreeInWorkTensor() const { | int MergeCPUKernel::FreeInWorkTensor() const { | ||||
| for (auto &in_tensor : this->in_tensors_) { | for (auto &in_tensor : this->in_tensors_) { | ||||
| MS_ASSERT(in_tensor != nullptr); | MS_ASSERT(in_tensor != nullptr); | ||||
| if (in_tensor->IsConst()) { | |||||
| if (in_tensor->IsConst() || in_tensor->IsGraphInput()) { | |||||
| continue; | continue; | ||||
| } | } | ||||
| if (in_tensor->ref_count() > 0) { | if (in_tensor->ref_count() > 0) { | ||||
| @@ -50,11 +50,13 @@ bool MergeCPUKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) { | |||||
| MS_ASSERT(in_tensors().size() == 2 * out_tensors().size()); | MS_ASSERT(in_tensors().size() == 2 * out_tensors().size()); | ||||
| return std::all_of(this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2, | return std::all_of(this->in_tensors().begin(), this->in_tensors().begin() + in_tensors().size() / 2, | ||||
| [&](lite::Tensor *kernel_in_tensor) { | [&](lite::Tensor *kernel_in_tensor) { | ||||
| return kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1; | |||||
| return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || | |||||
| kernel_in_tensor->ref_count() >= 1; | |||||
| }) || | }) || | ||||
| std::all_of(this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(), | std::all_of(this->in_tensors().begin() + in_tensors().size() / 2, this->in_tensors().end(), | ||||
| [&](lite::Tensor *kernel_in_tensor) { | [&](lite::Tensor *kernel_in_tensor) { | ||||
| return kernel_in_tensor->IsConst() || kernel_in_tensor->ref_count() >= 1; | |||||
| return kernel_in_tensor->IsConst() || kernel_in_tensor->IsGraphInput() || | |||||
| kernel_in_tensor->ref_count() >= 1; | |||||
| }); | }); | ||||
| } | } | ||||
| @@ -223,19 +223,7 @@ int OpenCLSubGraph::Init() { | |||||
| nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end()); | nodes_.insert(nodes_.end(), out_convert_ops_.begin(), out_convert_ops_.end()); | ||||
| GetInOutNodes(); | GetInOutNodes(); | ||||
| UpdateTensorDataType(); | UpdateTensorDataType(); | ||||
| ret = SubGraphKernel::Prepare(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "OpenCL prepare fail"; | |||||
| return ret; | |||||
| } | |||||
| auto opencl_exec = reinterpret_cast<lite::opencl::OpenCLExecutor *>(executor_); | |||||
| // If tuning_mode is DEFAULT, just malloc memory for reuse. | |||||
| ret = opencl_exec->RunOrTune(in_tensors_, out_tensors_, nodes_, allocator_, nullptr, nullptr, true); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run opencl executor failed: " << ret; | |||||
| return ret; | |||||
| } | |||||
| Fusion(); | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| @@ -307,10 +295,16 @@ int OpenCLSubGraph::Prepare() { | |||||
| MS_LOG(ERROR) << "Create OpenCLExecutor fail"; | MS_LOG(ERROR) << "Create OpenCLExecutor fail"; | ||||
| return RET_ERROR; | return RET_ERROR; | ||||
| } | } | ||||
| Fusion(); | |||||
| auto ret = Init(); | |||||
| auto ret = SubGraphKernel::Prepare(); | |||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| MS_LOG(ERROR) << "OpenCL subgraph init fail"; | |||||
| MS_LOG(ERROR) << "OpenCL prepare fail"; | |||||
| return ret; | |||||
| } | |||||
| auto opencl_exec = reinterpret_cast<lite::opencl::OpenCLExecutor *>(executor_); | |||||
| // If tuning_mode is DEFAULT, just malloc memory for reuse. | |||||
| ret = opencl_exec->RunOrTune(in_tensors_, out_tensors_, nodes_, allocator_, nullptr, nullptr, true); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Run opencl executor failed: " << ret; | |||||
| return ret; | return ret; | ||||
| } | } | ||||
| return RET_OK; | return RET_OK; | ||||
| @@ -79,6 +79,11 @@ int OpenCLExecutor::RunOrTune(std::vector<Tensor *> &inputs, std::vector<Tensor | |||||
| MS_LOG(ERROR) << "tuning kernel failed, name: " << kernel->name(); | MS_LOG(ERROR) << "tuning kernel failed, name: " << kernel->name(); | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| ret = kernel->PostProcess(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "PostProcess kernel failed, name: " << kernel->name(); | |||||
| return ret; | |||||
| } | |||||
| } else { | } else { | ||||
| ret = kernel->Run(); | ret = kernel->Run(); | ||||
| if (ret != RET_OK) { | if (ret != RET_OK) { | ||||
| @@ -375,6 +375,13 @@ int Scheduler::ConstructSubGraphs(std::vector<kernel::LiteKernel *> *kernels) { | |||||
| } | } | ||||
| kernels->emplace_back(subgraph); | kernels->emplace_back(subgraph); | ||||
| } | } | ||||
| for (auto *subgraph : *kernels) { | |||||
| auto ret = subgraph->Init(); | |||||
| if (ret != RET_OK) { | |||||
| MS_LOG(ERROR) << "Init SubGraph failed: " << ret; | |||||
| return ret; | |||||
| } | |||||
| } | |||||
| return RET_OK; | return RET_OK; | ||||
| } | } | ||||
| bool Scheduler::MergeOpIsReady(const kernel::LiteKernel *kernel, | bool Scheduler::MergeOpIsReady(const kernel::LiteKernel *kernel, | ||||
| @@ -407,12 +414,16 @@ kernel::SubGraphKernel *Scheduler::CreateSubGraphKernel(const std::vector<kernel | |||||
| } | } | ||||
| std::vector<Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); | std::vector<Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); | ||||
| std::vector<Tensor *> output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels); | std::vector<Tensor *> output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels); | ||||
| std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels); | |||||
| std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels); | |||||
| std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputNodes(kernels); | |||||
| std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputNodes(kernels); | |||||
| if (type == kernel::kGpuSubGraph) { | if (type == kernel::kGpuSubGraph) { | ||||
| #if SUPPORT_GPU | #if SUPPORT_GPU | ||||
| auto sub_kernel = new (std::nothrow) | auto sub_kernel = new (std::nothrow) | ||||
| kernel::OpenCLSubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | kernel::OpenCLSubGraph(input_tensors, output_tensors, input_kernels, output_kernels, kernels, context_); | ||||
| if (sub_kernel == nullptr) { | |||||
| MS_LOG(ERROR) << "Create OpenCLSubGraph failed"; | |||||
| return nullptr; | |||||
| } | |||||
| return sub_kernel; | return sub_kernel; | ||||
| #else | #else | ||||
| return nullptr; | return nullptr; | ||||
| @@ -102,6 +102,8 @@ class SubGraphKernel : public LiteKernel { | |||||
| void InitOutTensorInitRefCount() override; | void InitOutTensorInitRefCount() override; | ||||
| int Init() override { return mindspore::lite::RET_OK; } | |||||
| std::string ToString() const override; | std::string ToString() const override; | ||||
| std::vector<LiteKernel *> nodes() { return this->nodes_; } | std::vector<LiteKernel *> nodes() { return this->nodes_; } | ||||
| @@ -146,7 +148,7 @@ class CpuFp32SubGraph : public CpuSubGraph { | |||||
| } | } | ||||
| ~CpuFp32SubGraph() override = default; | ~CpuFp32SubGraph() override = default; | ||||
| int Init() override { return mindspore::lite::RET_ERROR; } | |||||
| int Init() override { return CpuSubGraph::Init(); } | |||||
| int PreProcess() override { return CpuSubGraph::PreProcess(); } | int PreProcess() override { return CpuSubGraph::PreProcess(); } | ||||
| int Run() override { return CpuSubGraph::Run(); } | int Run() override { return CpuSubGraph::Run(); } | ||||
| int Run(const KernelCallBack &before, const KernelCallBack &after) override { | int Run(const KernelCallBack &before, const KernelCallBack &after) override { | ||||
| @@ -166,7 +168,7 @@ class CpuFp16SubGraph : public CpuSubGraph { | |||||
| } | } | ||||
| ~CpuFp16SubGraph() override = default; | ~CpuFp16SubGraph() override = default; | ||||
| int Init() override { return mindspore::lite::RET_ERROR; } | |||||
| int Init() override { return CpuSubGraph::Init(); } | |||||
| int PreProcess() override; | int PreProcess() override; | ||||
| int Run() override { return CpuSubGraph::Run(); } | int Run() override { return CpuSubGraph::Run(); } | ||||
| int Run(const KernelCallBack &before, const KernelCallBack &after) override { | int Run(const KernelCallBack &before, const KernelCallBack &after) override { | ||||
| @@ -328,12 +328,6 @@ void *Tensor::MutableData() { | |||||
| return this->data_; | return this->data_; | ||||
| } | } | ||||
| bool Tensor::IsConst() { | |||||
| return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; | |||||
| } | |||||
| bool Tensor::IsScalar() { return this->category_ == CONST_SCALAR && this->data_ != nullptr; } | |||||
| void Tensor::AddQuantParam(const QuantArg &quant_arg) { this->quant_params_.push_back(quant_arg); } | void Tensor::AddQuantParam(const QuantArg &quant_arg) { this->quant_params_.push_back(quant_arg); } | ||||
| std::vector<QuantArg> Tensor::quant_params() const { return this->quant_params_; } | std::vector<QuantArg> Tensor::quant_params() const { return this->quant_params_; } | ||||
| @@ -45,7 +45,8 @@ class Tensor : public mindspore::tensor::MSTensor { | |||||
| enum Category { | enum Category { | ||||
| CONST_TENSOR, // weight tensor | CONST_TENSOR, // weight tensor | ||||
| CONST_SCALAR, // weight scalar | CONST_SCALAR, // weight scalar | ||||
| VAR // activation tensor | |||||
| VAR, // activation tensor | |||||
| GRAPH_INPUT, | |||||
| }; | }; | ||||
| Tensor() = default; | Tensor() = default; | ||||
| @@ -102,11 +103,13 @@ class Tensor : public mindspore::tensor::MSTensor { | |||||
| virtual void set_data(void *data) { this->data_ = data; } | virtual void set_data(void *data) { this->data_ = data; } | ||||
| Category category() { return this->category_; } | |||||
| Category category() const { return this->category_; } | |||||
| void set_category(Category category) { this->category_ = category; } | |||||
| void set_format(schema::Format format) { this->format_ = format; } | void set_format(schema::Format format) { this->format_ = format; } | ||||
| schema::Format format() { return this->format_; } | |||||
| schema::Format format() const { return this->format_; } | |||||
| size_t ref_count() const { return this->ref_count_; } | size_t ref_count() const { return this->ref_count_; } | ||||
| @@ -130,9 +133,13 @@ class Tensor : public mindspore::tensor::MSTensor { | |||||
| void set_quant_clusters(const std::vector<float> &clusters); | void set_quant_clusters(const std::vector<float> &clusters); | ||||
| bool IsConst(); | |||||
| bool IsConst() const { | |||||
| return (this->category_ == CONST_TENSOR || this->category_ == CONST_SCALAR) && this->data_ != nullptr; | |||||
| } | |||||
| bool IsScalar() const { return this->category_ == CONST_SCALAR && this->data_ != nullptr; } | |||||
| bool IsScalar(); | |||||
| bool IsGraphInput() const { return this->category_ == GRAPH_INPUT; } | |||||
| void Prepare() { | void Prepare() { | ||||
| if (allocator_ != nullptr) { | if (allocator_ != nullptr) { | ||||
| @@ -56,9 +56,9 @@ TEST_F(UtilsTest, TestSubgraph) { | |||||
| std::vector<kernel::LiteKernel *> kernels = {kernel0.get(), kernel1.get(), kernel2.get()}; | std::vector<kernel::LiteKernel *> kernels = {kernel0.get(), kernel1.get(), kernel2.get()}; | ||||
| auto input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels); | |||||
| auto input_kernels = kernel::LiteKernelUtil::SubgraphInputNodes(kernels); | |||||
| ASSERT_EQ(input_kernels.size(), 1); | ASSERT_EQ(input_kernels.size(), 1); | ||||
| auto output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels); | |||||
| auto output_kernels = kernel::LiteKernelUtil::SubgraphOutputNodes(kernels); | |||||
| ASSERT_EQ(output_kernels.size(), 1); | ASSERT_EQ(output_kernels.size(), 1); | ||||
| auto input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); | auto input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels); | ||||
| ASSERT_EQ(input_tensors.size(), 2); | ASSERT_EQ(input_tensors.size(), 2); | ||||