You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpu_session.cc 7.5 kB

5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. /**
  2. * Copyright 2019-2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "backend/session/cpu_session.h"
  17. #include <algorithm>
  18. #include <sstream>
  19. #include "ir/tensor.h"
  20. #include "ir/anf.h"
  21. #include "backend/kernel_compiler/kernel.h"
  22. #include "common/utils.h"
  23. #include "backend/session/anf_runtime_algorithm.h"
  24. #include "runtime/device/kernel_runtime.h"
  25. #include "predict/predict.h"
  26. #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
  27. #include "runtime/device/cpu/kernel_select_cpu.h"
  28. #include "backend/optimizer/common/optimizer.h"
  29. #include "backend/optimizer/common/pass_manager.h"
  30. #include "backend/optimizer/pass/replace_node_by_proxy.h"
  31. #ifdef ENABLE_DEBUGGER
  32. #include "debug/debugger/debugger.h"
  33. #endif
  34. #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
  35. #include "frontend/parallel/ps/util.h"
  36. #endif
  37. namespace mindspore {
  38. namespace session {
  39. ParameterPtr CPUSession::CreateNewParameterFromParameter(const AnfNodePtr &anf, bool valid_input, KernelGraph *graph) {
  40. MS_EXCEPTION_IF_NULL(anf);
  41. MS_EXCEPTION_IF_NULL(graph);
  42. if (!anf->isa<Parameter>()) {
  43. MS_LOG(EXCEPTION) << "anf[" << anf->DebugString() << "] is not a parameter";
  44. }
  45. auto valid_inputs = graph->MutableValidInputs();
  46. MS_EXCEPTION_IF_NULL(valid_inputs);
  47. auto graph_inputs = graph->MutableInputs();
  48. MS_EXCEPTION_IF_NULL(graph_inputs);
  49. TraceManager::DebugTrace(std::make_shared<TraceCopy>(anf->debug_info()));
  50. ParameterPtr new_parameter = graph->NewParameter(anf->cast<ParameterPtr>());
  51. TraceManager::EndTrace();
  52. graph_inputs->push_back(new_parameter);
  53. valid_inputs->push_back(valid_input);
  54. return new_parameter;
  55. }
  56. void CPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
  57. auto optimizer = std::make_shared<opt::GraphOptimizer>();
  58. auto pm = std::make_shared<opt::PassManager>();
  59. std::string pass_name = "replace_node_by_proxy";
  60. pass_name.append(std::to_string(graph_sum_));
  61. pm->AddPass(std::make_shared<opt::ReplaceNodeByProxy>(pass_name));
  62. optimizer->AddPassManager(pm);
  63. (void)optimizer->Optimize(kernel_graph);
  64. kernel_graph->SetExecOrderByDefault();
  65. }
  66. GraphId CPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
  67. auto graph_id = graph_sum_;
  68. auto graph = ConstructKernelGraph(lst, outputs);
  69. MS_EXCEPTION_IF_NULL(graph);
  70. MS_LOG(INFO) << "Set kernel info";
  71. SetKernelInfo(graph.get());
  72. #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
  73. AssignParamKey(graph);
  74. if (parallel::ps::Util::IsRoleOfWorker()) {
  75. Optimize(graph);
  76. }
  77. #endif
  78. predictmodel::StepConvertGraph(graph);
  79. MS_LOG(INFO) << "Build kernel";
  80. BuildKernel(graph.get());
  81. MS_LOG(INFO) << "Assign kernel address";
  82. runtime_.AssignKernelAddress(graph.get());
  83. return graph_id;
  84. }
  85. void CPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
  86. auto &kernel_graph = graphs_[graph_id];
  87. MS_EXCEPTION_IF_NULL(kernel_graph);
  88. #if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
  89. // Initialize parameter server
  90. if (!ps_init_) {
  91. InitPSParamAndOptim(kernel_graph, inputs);
  92. }
  93. #endif
  94. MS_LOG(INFO) << "Bind input output address";
  95. std::vector<tensor::TensorPtr> need_sync_outputs;
  96. runtime_.BindInputOutput(kernel_graph.get(), inputs, outputs, &need_sync_outputs);
  97. MS_LOG(INFO) << "Run graph start";
  98. predictmodel::StepConvertWeight(inputs);
  99. auto execution_order = kernel_graph->execution_order();
  100. Reorder(&execution_order);
  101. bool enable_summary = summary_callback_ != nullptr;
  102. kernel_graph->set_execution_order(execution_order);
  103. NamedSummaryOutputs summary_outputs;
  104. if (enable_summary) {
  105. GetSummaryNodes(kernel_graph.get());
  106. summary_outputs = kernel_graph->summary_nodes();
  107. runtime_.IncreaseSummaryRefCount(summary_outputs);
  108. }
  109. #ifdef ENABLE_DEBUGGER
  110. // debugger pre-execution processing
  111. if (debugger_) {
  112. debugger_->PreExecute(kernel_graph);
  113. }
  114. #endif
  115. bool ret = runtime_.Run(kernel_graph.get());
  116. if (!ret) {
  117. MS_LOG(EXCEPTION) << "Run graph failed";
  118. }
  119. for (auto output : need_sync_outputs) {
  120. (void)output->data_sync();
  121. }
  122. if (enable_summary) {
  123. Summary(kernel_graph.get());
  124. runtime_.DecreaseSummaryRefCount(summary_outputs);
  125. }
  126. #ifdef ENABLE_DEBUGGER
  127. // debugger post-execution processing
  128. if (debugger_) {
  129. debugger_->PostExecute();
  130. }
  131. #endif
  132. MS_LOG(INFO) << "Run graph end";
  133. }
  134. void CPUSession::SetKernelInfo(const KernelGraph *kernel_graph) {
  135. MS_EXCEPTION_IF_NULL(kernel_graph);
  136. auto &kernel_nodes = kernel_graph->execution_order();
  137. for (const auto &kernel_node : kernel_nodes) {
  138. MS_EXCEPTION_IF_NULL(kernel_node);
  139. device::cpu::SetKernelInfo(kernel_node);
  140. }
  141. }
  142. namespace {
  143. void KernelNotSupportException(const AnfNodePtr &kernel_node) {
  144. std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
  145. std::stringstream operator_info;
  146. operator_info << "Operator[" << kernel_name << "] ";
  147. auto kernel_info = dynamic_cast<device::KernelInfo *>(kernel_node->kernel_info());
  148. if (kernel_info == nullptr) {
  149. operator_info << "is not support.";
  150. MS_LOG(EXCEPTION) << operator_info.str();
  151. }
  152. auto kernel_build_Info = kernel_info->select_kernel_build_info();
  153. if (kernel_build_Info == nullptr) {
  154. operator_info << "is not support.";
  155. MS_LOG(EXCEPTION) << operator_info.str();
  156. }
  157. size_t input_num = kernel_build_Info->GetInputNum();
  158. if (input_num > 0) {
  159. operator_info << " input(";
  160. for (size_t i = 0; i < input_num; ++i) {
  161. operator_info << TypeIdLabel(kernel_build_Info->GetInputDeviceType(i));
  162. if (i != input_num - 1) {
  163. operator_info << ",";
  164. }
  165. }
  166. operator_info << ") ";
  167. }
  168. size_t output_num = kernel_build_Info->GetOutputNum();
  169. if (output_num > 0) {
  170. operator_info << "output(";
  171. for (size_t i = 0; i < output_num; ++i) {
  172. operator_info << TypeIdLabel(kernel_build_Info->GetOutputDeviceType(i));
  173. if (i != kernel_build_Info->GetOutputNum() - 1) {
  174. operator_info << ",";
  175. }
  176. }
  177. operator_info << ") ";
  178. }
  179. operator_info << "is not support.";
  180. MS_LOG(EXCEPTION) << operator_info.str();
  181. }
  182. } // namespace
  183. void CPUSession::BuildKernel(const KernelGraph *kernel_graph) {
  184. MS_EXCEPTION_IF_NULL(kernel_graph);
  185. auto &kernel_nodes = kernel_graph->execution_order();
  186. for (const auto &kernel_node : kernel_nodes) {
  187. MS_EXCEPTION_IF_NULL(kernel_node);
  188. std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
  189. MS_LOG(INFO) << "Cpu building operator[" << kernel_name << "].";
  190. std::shared_ptr<kernel::CPUKernel> cpu_kernel =
  191. kernel::CPUKernelFactory::GetInstance().Create(kernel_name, kernel_node);
  192. if (cpu_kernel == nullptr) {
  193. KernelNotSupportException(kernel_node);
  194. }
  195. cpu_kernel->Init(kernel_node);
  196. AnfAlgo::SetKernelMod(cpu_kernel, kernel_node.get());
  197. MS_LOG(INFO) << "Cpu build success operator[" << kernel_name << "].";
  198. }
  199. }
  200. } // namespace session
  201. } // namespace mindspore