You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu_session.cc 11 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "backend/session/cpu_session.h"
  17. #include <algorithm>
  18. #include <sstream>
  19. #include <exception>
  20. #include "ir/anf.h"
  21. #include "utils/ms_utils.h"
  22. #include "utils/trace_base.h"
  23. #include "backend/session/anf_runtime_algorithm.h"
  24. #include "runtime/device/kernel_runtime.h"
  25. #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
  26. #include "runtime/device/cpu/kernel_select_cpu.h"
  27. #include "backend/optimizer/common/optimizer.h"
  28. #include "backend/optimizer/common/pass_manager.h"
  29. #include "backend/optimizer/pass/replace_node_by_proxy.h"
  30. #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
  31. #include "ps/util.h"
  32. #endif
  33. namespace mindspore {
  34. namespace session {
  35. ParameterPtr CPUSession::CreateNewParameterFromParameter(const AnfNodePtr &anf, KernelGraph *graph) {
  36. MS_EXCEPTION_IF_NULL(anf);
  37. MS_EXCEPTION_IF_NULL(graph);
  38. if (!anf->isa<Parameter>()) {
  39. MS_LOG(EXCEPTION) << "anf[" << anf->DebugString() << "] is not a parameter";
  40. }
  41. auto valid_inputs = graph->MutableValidInputs();
  42. MS_EXCEPTION_IF_NULL(valid_inputs);
  43. auto graph_inputs = graph->MutableInputs();
  44. MS_EXCEPTION_IF_NULL(graph_inputs);
  45. TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info()));
  46. ParameterPtr new_parameter = graph->NewParameter(anf->cast<ParameterPtr>());
  47. TraceManager::EndTrace();
  48. graph_inputs->push_back(new_parameter);
  49. valid_inputs->push_back(true);
  50. return new_parameter;
  51. }
  52. // Remove after PS feature finish adapting push/pull in auto_monad.
  53. void CPUSession::Reorder(std::vector<CNodePtr> *node_list) { AnfAlgo::ReorderPosteriorExecList(NOT_NULL(node_list)); }
  54. void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
  55. auto optimizer = std::make_shared<opt::GraphOptimizer>();
  56. auto pm = std::make_shared<opt::PassManager>();
  57. std::string pass_name = "replace_node_by_proxy";
  58. pass_name.append(std::to_string(graph_sum_));
  59. pm->AddPass(std::make_shared<opt::ReplaceNodeByProxy>(pass_name));
  60. optimizer->AddPassManager(pm);
  61. (void)optimizer->Optimize(kernel_graph);
  62. kernel_graph->SetExecOrderByDefault();
  63. }
  64. GraphId CPUSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
  65. auto graph_id = graph_sum_;
  66. auto graph = ConstructKernelGraph(lst, outputs);
  67. MS_EXCEPTION_IF_NULL(graph);
  68. UpdateGraphDynamicShapeAttr(NOT_NULL(graph));
  69. graph->UpdateGraphDynamicAttr();
  70. MS_LOG(INFO) << "Set kernel info";
  71. SetKernelInfo(graph.get());
  72. #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
  73. if (ps::Util::IsParamServerMode()) {
  74. AssignParamKey(graph);
  75. if (ps::Util::IsRoleOfWorker()) {
  76. Optimize(graph);
  77. }
  78. }
  79. #endif
  80. MS_LOG(INFO) << "Build kernel";
  81. BuildKernel(graph.get());
  82. // Remove reorder after PS feature finish adapting push/pull in auto_monad.
  83. auto execution_order = graph->execution_order();
  84. Reorder(&execution_order);
  85. graph->set_execution_order(execution_order);
  86. // runtime init
  87. if (!runtime_.Init()) {
  88. MS_LOG(EXCEPTION) << "Kernel runtime init error.";
  89. }
  90. MS_LOG(INFO) << "Assign kernel address";
  91. runtime_.AssignKernelAddress(graph.get());
  92. return graph_id;
  93. }
  94. void CPUSession::CreateOutputTensors(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &input_tensors,
  95. VectorRef *outputs,
  96. std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
  97. auto kernel_graph = GetGraph(graph_id);
  98. MS_EXCEPTION_IF_NULL(kernel_graph);
  99. runtime_.CreateOutputTensors(kernel_graph.get(), input_tensors, outputs, tensor_to_node);
  100. }
  101. void CPUSession::SyncValueNodeDeviceAddr(const std::shared_ptr<KernelGraph> &kernel_graph) {
  102. auto context_ptr = MsContext::GetInstance();
  103. MS_EXCEPTION_IF_NULL(context_ptr);
  104. if (context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
  105. return;
  106. }
  107. runtime_.SyncValueNodeDeviceAddr(kernel_graph.get());
  108. }
  109. void CPUSession::RunGraphImpl(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs,
  110. VectorRef *outputs) {
  111. auto kernel_graph = GetGraph(graph_id);
  112. MS_EXCEPTION_IF_NULL(kernel_graph);
  113. SyncValueNodeDeviceAddr(kernel_graph);
  114. MS_LOG(INFO) << "Bind input output address";
  115. runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs);
  116. #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
  117. InitPSParamAndOptim(kernel_graph, inputs);
  118. #endif
  119. MS_LOG(INFO) << "Run graph start";
  120. bool enable_summary = summary_callback_ != nullptr;
  121. NamedSummaryOutputs summary_outputs;
  122. if (enable_summary) {
  123. SetSummaryNodes(kernel_graph.get());
  124. summary_outputs = kernel_graph->summary_nodes();
  125. runtime_.IncreaseSummaryRefCount(summary_outputs);
  126. }
  127. bool ret = runtime_.Run(kernel_graph.get(), false);
  128. if (!ret) {
  129. MS_LOG(EXCEPTION) << "Run graph failed";
  130. }
  131. if (enable_summary) {
  132. Summary(kernel_graph.get());
  133. runtime_.DecreaseSummaryRefCount(summary_outputs);
  134. }
  135. MS_LOG(INFO) << "Run graph end";
  136. }
  137. void CPUSession::BuildOpImpl(const OpRunInfo &op_run_info, const GraphInfo &graph_info,
  138. const std::vector<tensor::TensorPtr> &input_tensors,
  139. const std::vector<int64_t> &tensors_mask) {
  140. // Check if the graph cache exists.
  141. if (run_op_graphs_.find(graph_info) != run_op_graphs_.end()) {
  142. return;
  143. }
  144. // Prepare the graph
  145. auto kernel_graph = ConstructSingleOpGraph(op_run_info, input_tensors, tensors_mask);
  146. MS_EXCEPTION_IF_NULL(kernel_graph);
  147. SetKernelInfo(kernel_graph.get());
  148. BuildKernel(kernel_graph.get());
  149. run_op_graphs_[graph_info] = kernel_graph;
  150. }
  151. void CPUSession::SetOutputFlags(const VectorRef &base_ref, std::vector<tensor::TensorPtr> *outputs_tensors) {
  152. for (size_t i = 0; i < base_ref.size(); ++i) {
  153. if (utils::isa<VectorRef>(base_ref[i])) {
  154. auto ref_iter = utils::cast<VectorRef>(base_ref[i]);
  155. SetOutputFlags(ref_iter, outputs_tensors);
  156. } else if (utils::isa<tensor::TensorPtr>(base_ref[i])) {
  157. auto tensor_ptr = utils::cast<std::shared_ptr<tensor::Tensor>>(base_ref[i]);
  158. tensor_ptr->SetNeedWait(false);
  159. tensor_ptr->data_sync(false);
  160. outputs_tensors->push_back(tensor_ptr);
  161. }
  162. }
  163. }
  164. void CPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
  165. std::vector<tensor::TensorPtr> *input_tensors, VectorRef *outputs,
  166. const std::vector<int64_t> &tensors_mask) {
  167. MS_EXCEPTION_IF_NULL(input_tensors);
  168. MS_EXCEPTION_IF_NULL(op_run_info);
  169. BuildOpImpl(*op_run_info, graph_info, *input_tensors, tensors_mask);
  170. EraseValueNodeTensor(tensors_mask, input_tensors);
  171. auto kernel_graph = run_op_graphs_[graph_info];
  172. MS_EXCEPTION_IF_NULL(kernel_graph);
  173. // Remove reorder after PS feature finish adapting push/pull in auto_monad.
  174. auto execution_order = kernel_graph->execution_order();
  175. Reorder(&execution_order);
  176. kernel_graph->set_execution_order(execution_order);
  177. // runtime init
  178. if (!runtime_.Init()) {
  179. MS_LOG(EXCEPTION) << "Kernel runtime init error.";
  180. }
  181. runtime_.AssignKernelAddress(kernel_graph.get());
  182. std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node;
  183. runtime_.CreateOutputTensors(kernel_graph.get(), *input_tensors, outputs, &tensor_to_node);
  184. runtime_.BindInputOutput(kernel_graph.get(), *input_tensors, outputs);
  185. MS_LOG(INFO) << "Run Op start";
  186. bool ret = runtime_.Run(kernel_graph.get(), false);
  187. if (!ret) {
  188. MS_LOG(EXCEPTION) << "Run Op failed";
  189. }
  190. std::vector<tensor::TensorPtr> output_tensors;
  191. SetOutputFlags(*outputs, &output_tensors);
  192. MS_LOG(INFO) << "Run Op end";
  193. }
  194. void CPUSession::SetKernelInfo(const KernelGraph *kernel_graph) {
  195. MS_EXCEPTION_IF_NULL(kernel_graph);
  196. auto &kernel_nodes = kernel_graph->execution_order();
  197. for (const auto &kernel_node : kernel_nodes) {
  198. MS_EXCEPTION_IF_NULL(kernel_node);
  199. device::cpu::SetKernelInfo(kernel_node);
  200. }
  201. }
  202. namespace {
  203. void KernelNotSupportException(const AnfNodePtr &kernel_node) {
  204. std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
  205. std::stringstream operator_info;
  206. operator_info << "Operator[" << kernel_name << "] ";
  207. auto kernel_info = dynamic_cast<device::KernelInfo *>(kernel_node->kernel_info());
  208. if (kernel_info == nullptr) {
  209. operator_info << "is not support.";
  210. MS_LOG(EXCEPTION) << operator_info.str();
  211. }
  212. auto kernel_build_Info = kernel_info->select_kernel_build_info();
  213. if (kernel_build_Info == nullptr) {
  214. operator_info << "is not support.";
  215. MS_LOG(EXCEPTION) << operator_info.str();
  216. }
  217. size_t input_num = kernel_build_Info->GetInputNum();
  218. if (input_num > 0) {
  219. operator_info << " input(";
  220. for (size_t i = 0; i < input_num; ++i) {
  221. operator_info << TypeIdLabel(kernel_build_Info->GetInputDeviceType(i));
  222. if (i != input_num - 1) {
  223. operator_info << ",";
  224. }
  225. }
  226. operator_info << ") ";
  227. }
  228. size_t output_num = kernel_build_Info->GetOutputNum();
  229. if (output_num > 0) {
  230. operator_info << "output(";
  231. for (size_t i = 0; i < output_num; ++i) {
  232. operator_info << TypeIdLabel(kernel_build_Info->GetOutputDeviceType(i));
  233. if (i != kernel_build_Info->GetOutputNum() - 1) {
  234. operator_info << ",";
  235. }
  236. }
  237. operator_info << ") ";
  238. }
  239. operator_info << "is not support.";
  240. MS_LOG(EXCEPTION) << operator_info.str() << " Trace: " << trace::DumpSourceLines(kernel_node);
  241. }
  242. } // namespace
  243. void CPUSession::BuildKernel(const KernelGraph *kernel_graph) {
  244. MS_EXCEPTION_IF_NULL(kernel_graph);
  245. auto &kernel_nodes = kernel_graph->execution_order();
  246. for (const auto &kernel_node : kernel_nodes) {
  247. MS_EXCEPTION_IF_NULL(kernel_node);
  248. std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
  249. MS_LOG(INFO) << "Cpu building operator[" << kernel_name << "].";
  250. std::shared_ptr<kernel::CPUKernel> cpu_kernel =
  251. kernel::CPUKernelFactory::GetInstance().Create(kernel_name, kernel_node);
  252. if (cpu_kernel == nullptr) {
  253. KernelNotSupportException(kernel_node);
  254. }
  255. try {
  256. cpu_kernel->Init(kernel_node);
  257. } catch (std::exception &e) {
  258. MS_LOG(EXCEPTION) << e.what() << "\nTrace: " << trace::DumpSourceLines(kernel_node);
  259. }
  260. AnfAlgo::SetKernelMod(cpu_kernel, kernel_node.get());
  261. MS_LOG(INFO) << "Cpu build success operator[" << kernel_name << "].";
  262. }
  263. }
  264. } // namespace session
  265. } // namespace mindspore