You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

scheduler.cc 13 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "src/scheduler.h"
  17. #include <vector>
  18. #include <string>
  19. #include <algorithm>
  20. #include "include/errorcode.h"
  21. #include "src/kernel_registry.h"
  22. #include "src/common/graph_util.h"
  23. #include "src/common/utils.h"
  24. #if SUPPORT_GPU
  25. #include "src/runtime/kernel/opencl/subgraph_opencl_kernel.h"
  26. #include "src/runtime/opencl/opencl_runtime.h"
  27. #endif
  28. namespace mindspore::lite {
  29. int Scheduler::Schedule(const lite::Model *model, std::vector<Tensor *> *tensors,
  30. std::vector<kernel::LiteKernel *> *kernels) {
  31. // 1. op ---> kernel
  32. // 2. sub graph
  33. // 3. kernels (kernels --> subGraph)
  34. int ret = InferShape(model, tensors);
  35. if (ret != RET_OK) {
  36. MS_LOG(ERROR) << "op infer shape failed.";
  37. return RET_ERROR;
  38. }
  39. ret = InitOp2Kernel(model, tensors, kernels);
  40. if (ret != RET_OK) {
  41. MS_LOG(ERROR) << "init op to kernel failed.";
  42. return RET_ERROR;
  43. }
  44. kernel::LiteKernelUtil::TopologicalSortKernels(*kernels);
  45. ConstructSubgraphs(kernels);
  46. MS_LOG(DEBUG) << "schedule kernels success.";
  47. return RET_OK;
  48. }
  49. int Scheduler::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels) {
  50. bool infer_shape_interrupt = false;
  51. for (size_t i = 0; i < kernels.size(); ++i) {
  52. if (kernels[i] == nullptr) {
  53. MS_LOG(ERROR) << "input kernel is nullptr!";
  54. return RET_ERROR;
  55. }
  56. auto primitive = const_cast<mindspore::lite::PrimitiveC *>(kernels[i]->GetPrimitive());
  57. if (primitive == nullptr) {
  58. MS_LOG(ERROR) << "kernel(" << kernels[i]->name() << ")'s primitive is nullptr!";
  59. return RET_ERROR;
  60. }
  61. std::vector<Tensor *> &inputs = kernels[i]->in_tensors();
  62. std::vector<Tensor *> &outputs = kernels[i]->out_tensors();
  63. for (size_t j = 0; j < outputs.size(); j++) {
  64. outputs[j]->FreeData();
  65. }
  66. primitive->SetInferFlag(!infer_shape_interrupt);
  67. auto ret = primitive->InferShape(inputs, outputs);
  68. if (ret == RET_INFER_INVALID) {
  69. MS_LOG(INFO) << "InferShape shouldn't be done before runtime, type:"
  70. << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type()))
  71. << "flag set to false.";
  72. primitive->SetInferFlag(false);
  73. infer_shape_interrupt = true;
  74. } else if (ret != RET_OK) {
  75. MS_LOG(ERROR) << "InferShape failed, type: "
  76. << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type()));
  77. return RET_INFER_ERR;
  78. }
  79. if (!infer_shape_interrupt) {
  80. ret = kernels[i]->ReSize();
  81. if (ret != RET_OK) {
  82. MS_LOG(ERROR) << "kernel " << kernels[i]->name() << " resize fail!ret = " << ret;
  83. return ret;
  84. }
  85. }
  86. }
  87. return RET_OK;
  88. }
  89. int Scheduler::InferShape(const lite::Model *model, std::vector<Tensor *> *tensors) {
  90. MS_ASSERT(model != nullptr);
  91. MS_ASSERT(tensors != nullptr);
  92. bool infer_shape_interrupt = false;
  93. uint32_t kernelCount = model->nodes_.size();
  94. for (uint32_t i = 0; i < kernelCount; ++i) {
  95. auto node = model->nodes_[i];
  96. MS_ASSERT(node != nullptr);
  97. std::vector<Tensor *> inputs;
  98. std::vector<Tensor *> outputs;
  99. auto in_size = node->input_indices_.size();
  100. for (size_t j = 0; j < in_size; ++j) {
  101. inputs.emplace_back(tensors->at(node->input_indices_[j]));
  102. }
  103. auto out_size = node->output_indices_.size();
  104. for (size_t j = 0; j < out_size; ++j) {
  105. outputs.emplace_back(tensors->at(node->output_indices_[j]));
  106. }
  107. auto *primitive = node->primitive_;
  108. if (primitive == nullptr) {
  109. MS_LOG(ERROR) << "Op " << node->name_ << " should exist in model!";
  110. return RET_ERROR;
  111. }
  112. primitive->SetInferFlag(!infer_shape_interrupt);
  113. auto ret = primitive->InferShape(inputs, outputs);
  114. if (ret == RET_INFER_INVALID) {
  115. MS_LOG(INFO) << "InferShape shouldn't be done before runtime, name: " << node->name_
  116. << ", type: " << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type()))
  117. << "flag set to false.";
  118. primitive->SetInferFlag(false);
  119. infer_shape_interrupt = true;
  120. } else if (ret != RET_OK) {
  121. MS_LOG(ERROR) << "InferShape failed, name: " << node->name_ << ", type: "
  122. << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type()));
  123. return RET_INFER_ERR;
  124. }
  125. }
  126. return RET_OK;
  127. }
  128. int Scheduler::InitOp2Kernel(const lite::Model *model, std::vector<Tensor *> *tensors,
  129. std::vector<kernel::LiteKernel *> *kernels) {
  130. MS_ASSERT(model != nullptr);
  131. MS_ASSERT(tensors != nullptr);
  132. uint32_t kernelCount = model->nodes_.size();
  133. auto graph_output_node_indexes = GetGraphOutputNodes(model);
  134. for (uint32_t i = 0; i < kernelCount; ++i) {
  135. auto node = model->nodes_[i];
  136. MS_ASSERT(node != nullptr);
  137. std::vector<Tensor *> inputs;
  138. std::vector<Tensor *> outputs;
  139. auto in_size = node->input_indices_.size();
  140. for (size_t j = 0; j < in_size; ++j) {
  141. inputs.emplace_back(tensors->at(node->input_indices_[j]));
  142. }
  143. auto out_size = node->output_indices_.size();
  144. for (size_t j = 0; j < out_size; ++j) {
  145. outputs.emplace_back(tensors->at(node->output_indices_[j]));
  146. }
  147. auto *primitive = node->primitive_;
  148. MS_ASSERT(primitive != nullptr);
  149. auto *kernel = this->ScheduleNode(inputs, outputs, primitive, node);
  150. if (kernel == nullptr) {
  151. MS_LOG(ERROR) << "ScheduleNode return nullptr, name: " << node->name_ << ", type: "
  152. << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type()));
  153. return RET_ERROR;
  154. }
  155. SetKernelTensorDataType(kernel);
  156. kernel->set_name(node->name_);
  157. kernel->set_is_model_output(IsContain(graph_output_node_indexes, size_t(i)));
  158. kernels->emplace_back(kernel);
  159. }
  160. return RET_OK;
  161. }
  162. void Scheduler::ConstructSubgraphs(std::vector<kernel::LiteKernel *> *kernels) {
  163. uint32_t kernel_count = kernels->size();
  164. std::vector<kernel::LiteKernel *> sub_kernels;
  165. std::vector<std::vector<kernel::LiteKernel *>> sub_kernels_list;
  166. kernel::KERNEL_ARCH prev_arch = kernels->front()->desc().arch;
  167. for (uint32_t i = 0; i < kernel_count; ++i) {
  168. auto curr_kernel = kernels->at(i);
  169. auto curr_arch = curr_kernel->desc().arch;
  170. if (curr_arch == prev_arch) {
  171. sub_kernels.emplace_back(curr_kernel);
  172. }
  173. if ((curr_arch != prev_arch) || (i == kernel_count - 1)) {
  174. sub_kernels_list.emplace_back(sub_kernels);
  175. sub_kernels.clear();
  176. sub_kernels.emplace_back(curr_kernel);
  177. }
  178. prev_arch = curr_arch;
  179. }
  180. std::vector<kernel::LiteKernel *> subgraph_kernels;
  181. size_t sub_cnt{0};
  182. for (auto temp_kernels : sub_kernels_list) {
  183. std::vector<Tensor *> output_tensor = kernel::LiteKernelUtil::SubgraphOutputTensors(temp_kernels);
  184. for (auto tensor : output_tensor) {
  185. if (context_->float16_priority && tensor->data_type() == kNumberTypeFloat16) {
  186. tensor->set_data_type(kNumberTypeFloat32);
  187. }
  188. }
  189. kernel::KERNEL_ARCH arch = temp_kernels.front()->desc().arch;
  190. if (arch == kernel::KERNEL_ARCH::kCPU) {
  191. for (auto kernel : temp_kernels) {
  192. for (auto tensor : kernel->out_tensors()) {
  193. tensor->set_allocator(context_->allocator.get());
  194. }
  195. }
  196. std::copy(temp_kernels.begin(), temp_kernels.end(), std::back_inserter(subgraph_kernels));
  197. } else {
  198. auto subgraph_kernel = CreateSubKernel(temp_kernels, arch);
  199. subgraph_kernels.emplace_back(subgraph_kernel);
  200. std::string arch_name = (arch == kernel::KERNEL_ARCH::kGPU) ? "GPU" : "NPU";
  201. MS_LOG(INFO) << arch_name << " subgraph id" << sub_cnt << " created.";
  202. }
  203. ++sub_cnt;
  204. }
  205. kernels->clear();
  206. kernels->insert(kernels->begin(), subgraph_kernels.begin(), subgraph_kernels.end());
  207. }
  208. kernel::LiteKernel *Scheduler::CreateSubKernel(const std::vector<kernel::LiteKernel *> &kernels,
  209. kernel::KERNEL_ARCH arch) {
  210. kernel::LiteKernel *sub_kernel = nullptr;
  211. #if SUPPORT_GPU
  212. if (arch == kernel::KERNEL_ARCH::kGPU) {
  213. std::vector<Tensor *> input_tensors = kernel::LiteKernelUtil::SubgraphInputTensors(kernels);
  214. std::vector<Tensor *> output_tensors = kernel::LiteKernelUtil::SubgraphOutputTensors(kernels);
  215. std::vector<kernel::LiteKernel *> input_kernels = kernel::LiteKernelUtil::SubgraphInputKernels(kernels);
  216. std::vector<kernel::LiteKernel *> output_kernels = kernel::LiteKernelUtil::SubgraphOutputKernels(kernels);
  217. sub_kernel = new kernel::SubGraphOpenCLKernel(input_tensors, output_tensors, input_kernels, output_kernels, kernels,
  218. context_, nullptr);
  219. sub_kernel->Init();
  220. } else if (arch == kernel::KERNEL_ARCH::kNPU) {
  221. MS_LOG(ERROR) << "NPU kernel is not supported";
  222. } else {
  223. MS_LOG(ERROR) << "unsupported kernel arch: " << arch;
  224. }
  225. #endif
  226. return sub_kernel;
  227. }
  228. kernel::LiteKernel *Scheduler::ScheduleNode(const std::vector<Tensor *> &in_tensors,
  229. const std::vector<Tensor *> &out_tensors,
  230. const mindspore::lite::PrimitiveC *primitive, const Model::Node *node) {
  231. MS_ASSERT(primitive != nullptr);
  232. TypeId data_type = GetFirstFp32Fp16OrInt8Type(in_tensors);
  233. kernel::KernelKey desc{kernel::KERNEL_ARCH::kCPU, data_type, static_cast<schema::PrimitiveType>(primitive->Type())};
  234. #if SUPPORT_GPU
  235. if (context_->device_type_ == DT_GPU) {
  236. desc.arch = kernel::KERNEL_ARCH::kGPU;
  237. auto *kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc);
  238. if (kernel != nullptr) {
  239. kernel->set_desc(desc);
  240. return kernel;
  241. } else {
  242. MS_LOG(ERROR) << "Not supported GPU Op "
  243. << schema::EnumNamePrimitiveType(static_cast<schema::PrimitiveType>(primitive->Type())) << " "
  244. << node->name_;
  245. }
  246. }
  247. #endif
  248. desc.arch = kernel::KERNEL_ARCH::kCPU;
  249. kernel::LiteKernel *kernel = nullptr;
  250. if ((context_->float16_priority && data_type == kNumberTypeFloat32) || data_type == kNumberTypeFloat16) {
  251. // check if support fp16
  252. kernel::KernelKey key{desc.arch, kNumberTypeFloat16, desc.type};
  253. kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, key);
  254. if (kernel != nullptr) {
  255. MS_LOG(DEBUG) << "Get fp16 op success.";
  256. desc.data_type = kNumberTypeFloat16;
  257. kernel->set_desc(desc);
  258. return kernel;
  259. }
  260. MS_LOG(DEBUG) << "Get fp16 op failed, back to fp32 op.";
  261. }
  262. if (data_type == kNumberTypeFloat16) {
  263. desc.data_type = kNumberTypeFloat32;
  264. }
  265. kernel = KernelRegistry::GetInstance()->GetKernel(in_tensors, out_tensors, primitive, context_, desc);
  266. if (kernel != nullptr) {
  267. kernel->set_desc(desc);
  268. return kernel;
  269. }
  270. return nullptr;
  271. }
  272. TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors) {
  273. for (const auto &tensor : in_tensors) {
  274. auto dtype = tensor->data_type();
  275. if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8) {
  276. return dtype;
  277. }
  278. }
  279. return kNumberTypeFloat32;
  280. }
  281. void Scheduler::SetKernelTensorDataType(kernel::LiteKernel *kernel) {
  282. if (kernel->desc().arch != kernel::KERNEL_ARCH::kCPU) {
  283. return;
  284. }
  285. if (kernel->desc().data_type == kNumberTypeFloat16) {
  286. for (auto tensor : kernel->out_tensors()) {
  287. if (tensor->data_type() == kNumberTypeFloat32) {
  288. tensor->set_data_type(kNumberTypeFloat16);
  289. }
  290. }
  291. } else if (kernel->desc().data_type == kNumberTypeFloat32) {
  292. for (auto tensor : kernel->in_tensors()) {
  293. if (tensor->category() != Tensor::Category::CONST && tensor->data_type() == kNumberTypeFloat16) {
  294. tensor->set_data_type(kNumberTypeFloat32);
  295. }
  296. }
  297. for (auto tensor : kernel->out_tensors()) {
  298. if (tensor->data_type() == kNumberTypeFloat16) {
  299. tensor->set_data_type(kNumberTypeFloat32);
  300. }
  301. }
  302. }
  303. }
  304. } // namespace mindspore::lite